Everyone should be familiar with the concept of brute forcing. In short it is an action of trying every possible combination till succeeding. Most often used in password hacking / hash guessing.

We can brute force all kinds of problems in our life and the only drawback is how much time can we spend for that. And I was curious if we can write a simple Hello world! program i C with it.

To achieve this we should:

  1. Iterate a string of valid C code characters.
  2. Try to compile it.
  3. Try to run it.
  4. Check if the output will match a desired string, in this case Hello world.

I wrote a simple C program to do all those steps in a loop till it reached 100 characters of code in length just to check how long will it take.

[expand]

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>

#define MAX_BUFF_LEN		100
#define HELLO_C_FILE_NAME	("hello.c")
#define DESIRED_STRING		("Hello world\n")
#define TEST_HELLO_WORLD	("#include <stdio.h>\nvoid main(){printf(\"Hello world\\n\");}")

static inline int inc_string(char * str, unsigned int max_len)
{
	char low_limit = 32; // as for space ' '
	char high_limit = 126; // as for tilde '~'
	unsigned int len;

	len = strlen(str) - 1; // because indexing in C starts from 0
	if (len >= MAX_BUFF_LEN) return -1; // if we reached the max lenght of the buffer
	if (str[len] >= high_limit) { // "xxxxx~"
		for (int i = len; i >= 0; i--) {
			if (str[i] < high_limit) break;
			else {
				str[i] = low_limit;
				if (i == 0) {
					str[len + 1] = low_limit;
					str[len + 2] = '\0';
				} else str[i - 1]++;
			}
		}
	} else {
		str[len]++;
	}

	return 0;
}

static inline int read_file(const char * path, char * str, unsigned int max_len)
{
	FILE *fp;
	long file_size;
	fp = fopen(path, "r");
	if (fp == NULL) {
		perror("Error readind previous hello.c code!");
		return(-1);
	}
	fseek(fp, 0L, SEEK_END);
	file_size = ftell(fp);
	rewind(fp);
	if (file_size > max_len) {
		printf("File 'hello.c' biger then max_len. Missing null terminator?\n");
		fclose(fp);
		return(-1);
	}
	if (1 != fread(str, file_size, 1, fp)) {
		printf("Error reading 'hello.c'\n");
		fclose(fp);
		return(-1);
	}
	fclose(fp);
	return 1;
}

static inline int write_file(char * path, char * str, unsigned int max_len)
{
	FILE *fp;
	fp = fopen(path, "w");
	if (fp == NULL) {
		perror("Error readind previous hello.c code!");
		return(-1);
	}
	fprintf(fp, "%s", str);
	fclose(fp);
	return 1;
}

int main(void)
{
	char buff[MAX_BUFF_LEN] = " \0";
	char ret_buff[MAX_BUFF_LEN];
	char command[100];
	FILE *fp;

	snprintf(command, sizeof(command), "tcc -run %s 2> /dev/null", HELLO_C_FILE_NAME);
	read_file(HELLO_C_FILE_NAME, buff, MAX_BUFF_LEN);

	for (;;) {
		fp = popen(command, "r");
		while (fgets(ret_buff, sizeof(ret_buff), fp) != NULL) {
			if (memcmp(DESIRED_STRING, ret_buff, sizeof(DESIRED_STRING)) == 0) {
				printf("YESSS\n");
				exit(0);
			}
		}
		pclose(fp);
		inc_string(buff, MAX_BUFF_LEN);
		write_file(HELLO_C_FILE_NAME, buff, MAX_BUFF_LEN);
	}
	return 0;
}

[/expand]

I compiled with gcc -Ofast flag run it on a RAM disk mounted with sudo mount -t tmpfs -o size=10m ramdisk ./ramdisk, and waited…

And it never finished. After two hours it reached a string of five characters. So my curiosity is fulfilled and now I know that the GitHub Copilot or an army of interns is the closest we will get to automatic code generation.