CRiSP, DTrace, and other technobabble: ELF

After a hard battle, it looks like it works. I dont promise
this will work for everyones executables, but it works for my
Linux 32 and 64 bit crisp executables.

I had to totally take a different direction for ELF32. For ELF32, we
dont have a gap between .text and the .data/.bss and other sections, so
we cannot shoe horn in the new .hash table.

Instead, we can overwrite the .gnu.hash with a .hash conforming table -
assuming our table is no larger than .gnu.hash. (.gnu.hash is supposed
to be smaller than .hash, but looks like for ELF32 binaries, this
is not the case).

Ideally, the next step is to package up the binary and/or source
so others can play with it.

I wrote my own ELF library, rather than rely on -lelf and -lgelf, since
I hit some horror stories in those libraries and didnt fancy debugging
or coping with older and newer libraries.

My elf library tries to hide some of the 32/64 bit ELF differences,
and is "not bad" - could do with more work to shield more differences,
but the design pattern of handling ELF32 + ELF64 is fine.

I also had to enhance my very old elfdump tool (like readelf and objdump),
since I was not happy with either of those: they work "mostly", but when
tracking down ELF brokeness, they can fall over or just ignore
the issue.

I attach the code below, which wont compile, because of the
need for the libraries, but, if theres enough interest, I will make
it available.

/**********************************************************************/
/*                                                                    */
/*      CRiSP - Programmable editor                                   */
/*      ===========================                                   */
/*                                                                    */
/*  File:          elfrewrite.c                                       */
/*  Author:        P. D. Fox                                          */
/*  Created:       16 May 2010                                        */
/*                                                                    */
/*  Copyright (c) 2010, Foxtrot Systems Ltd                           */
/*                All Rights Reserved.                                */
/*                                                                    */
/*--------------------------------------------------------------------*/
/*  Description:  Tool to create portable Linux binaries              */
/*--------------------------------------------------------------------*/
/*   $Header: Last edited: 16-May-2010 1.1 $			      */
/*                                                                    */
/*   ld --hash-style=both [sysv|gnu|both]			      */
/**********************************************************************/

/*
gcc -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o elfrewrite elfrewrite.c -lelf
make make_foxlib ; gcc -g -I. -DCR_LINUX_X86_64 -Iinclude -Ifoxlib/elf -o /tmp/rw ~/tmp/rw.c bin/foxlib.a -lelf && cp /tmp/x1 /tmp/x && /tmp/rw /tmp/x
*/

# include <machine.h>
# include <stdio.h>
# include <string.h>
# include <stdlib.h>
# include <unistd.h>
# include <libelf.h>
# include <elf.h>
# include <gelf.h>
# include <sys/types.h>
# include <sys/stat.h>
# include <elf/elflib.h>

# define TRUE	1
# define FALSE	0

static int debug;
static int v_flag;

char	*fname;
int	patched = FALSE;
int	patched_strtab = FALSE;
void	*hash_addr;
elf_t	*elf;

/**********************************************************************/
/*   Prototypes.						      */
/**********************************************************************/
int get_hash_size(int n);
int do_file(void);
void patch_dynamic(void);
void patch_glibc_functions(void);
void patch_hash(void);

int do_switches(int argc, char **argv)
{	int	i;

	for (i = 1; i < argc; i++) {
		char *cp = argv[i];
		if (*cp++ != '-')
			return i;
		while (*cp) {
			switch (*cp++) {
			  case 'd':
			  	debug = TRUE;
				break;

			  case 'v':
			  	v_flag = 1;
				break;
			  }
			}
		}
	return i;
}
int main(int argc, char **argv)
{
	int	i, arg_index;

	arg_index = do_switches(argc, argv);

        if (arg_index >= argc) {
                printf("Usage: elfrewrite <a.out>\n");
                exit(1);
                }

	for (i = arg_index; i < argc; i++) {
		patched = FALSE;
		patched_strtab = FALSE;
		hash_addr = NULL;
		fname = argv[i];
		do_file();
		}
	exit(0);
}
int
do_file()
{
# if !defined(HAVE_LIBELF_H)
	printf("Sorry, libelf.h not on this system - so this is a dummy\n");
	return 0;
# else

	if ((elf = elf_read(fname)) == NULL) {
		char *cp;
		int ret = elf_get_error(&cp);
		printf("ELF error: %d %s\n", ret, cp);
		return -1;
		}

	/***********************************************/
	/*   Patch GLIBC functions.		       */
	/***********************************************/
	patch_glibc_functions();

	/***********************************************/
	/*   Patch  the  .gnu.hash  or  add  a  .hash  */
	/*   hiding in the ELF.			       */
	/***********************************************/
	patch_hash();

	/***********************************************/
	/*   Patch  the  .dynamic entries, e.g. based  */
	/*   on the hash table updates.		       */
	/***********************************************/
	patch_dynamic();

	/***********************************************/
	/*   Now save the results.		       */
	/***********************************************/
	if (patched) {
		if (elf_write_file(elf, fname) < 0) {
			printf("Failed to write file\n");
			perror(fname);
			}
		}

	elf_free(elf);
	return 0;
#endif
}
/**********************************************************************/
/*   Patch  the  entries  which cause versioning issues or .gnu.hash  */
/*   issues on older glibcs.					      */
/**********************************************************************/
void
patch_dynamic()
{	char	*cp;
	char	*cpend;
	char	*ip;
	int	dynamic;
	int	size = elf->is_64 ? sizeof(long) : sizeof(int);

	/***********************************************/
	/*   Patch  the  Dynamic  section  for  GLIBC  */
	/*   dependencies.			       */
	/***********************************************/
	if ((dynamic = elf_get_section_by_name(elf, ".dynamic")) < 0)
		return;

	cp = elf->e_sections[dynamic].s_data;
	cpend = cp + elf->e_sections[dynamic].s_size;
        for (ip = cp; ip < cpend; ip += 2 * size) {
		long v = elf->is_64 ? ((long *) ip)[0] : ((int *) ip)[0];
		long v1 = elf->is_64 ? ((long *) ip)[1] : ((int *) ip)[1];
		switch (v) {
		  case DT_VERNEED:
# define padding 0x8000000
                        printf("DT_VERNEED: %08lx patched\n", v1);
			if (elf->is_64)
	                        ((long *)ip)[0] = padding;
			else
	                        ((int *)ip)[0] = padding;
			patched = 1;
			break;

		  case DT_VERNEEDNUM:
                        printf("DT_VERNEEDNUM: %08lx patched\n", v1);
			if (elf->is_64)
	                        ((long *)ip)[0] = padding;
			else
	                        ((int *)ip)[0] = padding;
			patched = 1;
			break;

                  case DT_VERSYM:
                        printf("DT_VERSYM: %08lx patched\n", v1);
			if (elf->is_64)
	                        ((long *)ip)[0] = padding;
			else
	                        ((int *)ip)[0] = padding;
			patched = 1;
			break;

                  case DT_GNU_HASH:
                        printf("DT_GNU_HASH: %08lx -> %08lx\n", 
				v1, hash_addr);
			if (elf->is_64) {
	                        ((long *)ip)[0] = DT_HASH;
	                        ((long *)ip)[1] = hash_addr;
				}
			else {
	                        ((int *)ip)[0] = DT_HASH;
	                        ((int *)ip)[1] = hash_addr;
				}
			patched = 1;
			break;
                  }
		}
}
/**********************************************************************/
/*   Patch the versionised glibc functions to be non-versionised.     */
/**********************************************************************/
void
patch_glibc_functions()
{	int	i;
	char	*cp;
	char	*cpend;

	for (i = 0; i < elf->e_shnum; i++) {
		if (elf->e_sections[i].s_type != SHT_STRTAB)
			continue;

		cp = elf->e_sections[i].s_data;
		cpend = cp + elf->e_sections[i].s_size;
		if (debug)
			printf("Examining strings in %s\n", elf_section_name(elf, i));
		while (cp < cpend) {
			char	*next_cp = cp + strlen(cp) + 1;
			if (debug)
				printf("str=%s\n", cp);
			if (strcmp(cp, "__isoc99_sscanf@@GLIBC_2.7") == 0) {
				strcpy(cp, "sscanf");
				patched = TRUE;
				patched_strtab = TRUE;
				printf("%s: patched %s: __isoc99_sscanf@@GLIBC_2.7 -> sscanf\n", 
					fname, elf->e_sections[i].s_name
					);
				}
			if (strcmp(cp, "__isoc99_sscanf") == 0) {
				strcpy(cp, "sscanf");
				patched = TRUE;
				patched_strtab = TRUE;
				printf("%s: patched %s: __isoc99_sscanf -> sscanf\n", 
					fname, elf->e_sections[i].s_name
					);
				}
			cp = next_cp;
			}
		}
}
/**********************************************************************/
/*   Patch the hash table in for older glibc implementations.	      */
/**********************************************************************/
void
patch_hash()
{	int	i;
	int	hash;
	int	gnu_hash;
	char	*cp;
	char	*cpend;
	int	dynsym;
	int	dynstr;
	int	nsyms;
	int	seg;
	
	int nchain, nbuckets, w_size;
	int	first_chain;
	Elf64_Word      *bucket_array;
	Elf64_Word      *chain_array;
	Elf64_Word *wp;

	/***********************************************/
	/*   See if we have a .hash or .gnu_hash       */
	/***********************************************/
	hash = elf_get_section_by_name(elf, ".hash");
	gnu_hash = elf_get_section_by_name(elf, ".gnu.hash");
	if (hash > 0)
		return;

	if (debug && hash < 0) {
		if (gnu_hash < 0) 
			printf("Missing .hash and .gnu.hash sections\n");
		else
			printf("Missing .hash, but have .gnu.hash section\n");
		}

	dynstr = elf_get_section_by_name(elf, ".dynstr");
	dynsym = elf_get_section_by_name(elf, ".dynsym");
	nsyms = elf->e_sections[dynsym].s_size / sizeof(Elf64_Sym);

	/***********************************************/
	/*   Create the hash table.		       */
	/***********************************************/
	first_chain = 1;
	nchain = nsyms;
	nbuckets = get_hash_size(nchain);
	w_size = (1 + /* zero entry unused */
		  1 + /* nbuckets */
		  1 + /* nchain */
		  nbuckets +
		  nchain) * sizeof(*wp);
	wp = calloc(w_size, 1);

	wp[0] = nbuckets;
	wp[1] = nchain;

	if (v_flag)
		printf("w_size=%d, buckets=%d, chains=%d\n", w_size, nbuckets, nchain);
	bucket_array = wp + 2;
	chain_array = bucket_array + nbuckets;

	for (i = 1; i < nchain; i++) {
		Elf64_Sym *symp = (Elf64_Sym *) elf->e_sections[dynsym].s_data + i;
		char *name = elf->e_sections[dynstr].s_data + symp->st_name;
		int	h = elf_hash((unsigned char *) name) % nbuckets;
		chain_array[first_chain] = bucket_array[h];
		bucket_array[h] = first_chain;
		first_chain++;
		}
//printf("crc=%lx\n", crc32(wp, w_size));

	/***********************************************/
	/*   Merge  this  into  the  executable  code  */
	/*   segment.  We  have  two  strategies. For  */
	/*   ELF32,  we  can  overwrite the .gnu.hash  */
	/*   section,  since our hash is smaller than  */
	/*   the    generated    one.    (This   isnt  */
	/*   guaranteed,  but gets us out of a hole).  */
	/*   The  hole is that in ELF32, the data+bss  */
	/*   immediately  follows  the .text area and  */
	/*   theres not a big enough gap to slide our  */
	/*   new hash table into the binary.	       */
	/*   					       */
	/*   For  ELF64,  we tack the hash chain onto  */
	/*   the   last  data  section  -  since  the  */
	/*   .gnu.hash section is typically too small  */
	/*   for     us.     (We    could    probably  */
	/*   compress/optimise the hash table, but it  */
	/*   is touch and go).			       */
	/*   					       */
	/*   We   want   a   unified   approach,  but  */
	/*   debugging  and  handling sections moving  */
	/*   can  cause  us  to  need  to  do  a full  */
	/*   relink, which is even more treacherous.   */
	/***********************************************/

	/***********************************************/
	/*   Handle ELF32.			       */
	/***********************************************/
	if (elf->is_64 == FALSE) {
		if (elf_section_size(elf, gnu_hash) < w_size) {
			printf("Help! Need %d section, but only have %d (.gnu.hash)\n",
				 w_size, elf_section_size(elf, gnu_hash));
			exit(1);
			}
		hash_addr = elf_section_addr(elf, gnu_hash);
		memcpy(elf->e_sections[gnu_hash].s_data, wp, w_size);
		return;
		}

	/***********************************************/
	/*   Handle ELF64.			       */
	/***********************************************/

	/***********************************************/
	/*   Round  up  size  to  a 4K boundary, else  */
	/*   kernel  will  KILL  the  binary  due  to  */
	/*   misalignment of the data LOAD Phdr.       */
	/***********************************************/
	int w_size1 = (w_size | (0x1000 -1)) + 1;

	/***********************************************/
	/*   Find  first  writable  section - we want  */
	/*   the  one before that, so we can dump our  */
	/*   payload in.			       */
	/***********************************************/
	for (i = 1; i < elf->e_shnum; i++) {
		if (elf->e_sections[i].s_flags & SHF_WRITE)
			break;
	}
	i--;

	if (debug || v_flag)
		printf("Extending seg#%d\n", i);

	/***********************************************/
	/*   Modify  the target segment to append our  */
	/*   payload.				       */
	/***********************************************/
	cp = malloc(elf->e_sections[i].s_size + w_size);
	memcpy(cp, elf->e_sections[i].s_data, elf->e_sections[i].s_size);
	memcpy(cp + elf->e_sections[i].s_size, wp, w_size);
	w_size = w_size1;
	free(elf->e_sections[i].s_data);
	elf->e_sections[i].s_data = cp;
	elf->e_sections[i].s_size += w_size;

	/***********************************************/
	/*   We  need  to update the .dynamic section  */
	/*   to   put  our  .hash  "segment"  in,  so  */
	/*   remember where we left it.		       */
	/***********************************************/
	if (elf->is_64)
		hash_addr = (void *) (elf->e_shdr64[i].sh_addr + elf->e_shdr64[i].sh_size);
	else
		hash_addr = (void *) (elf->e_shdr32[i].sh_addr + elf->e_shdr32[i].sh_size);

	/***********************************************/
	/*   This is horrible but necessary - destroy  */
	/*   the   ".eh_frame"   name   so  that  the  */
	/*   debugger  cannot  find  it, otherwise we  */
	/*   will   core  dump  gdb  when  hitting  a  */
	/*   breakpoint.			       */
	/***********************************************/
	if (elf->is_64)
		elf->e_shdr64[i].sh_name++;
	else
		elf->e_shdr32[i].sh_name++;

	/***********************************************/
	/*   Update  subsequent  sections  because we  */
	/*   moved  them in memory (these only affect  */
	/*   the loadable offset, not the p/v addr).   */
	/***********************************************/
	if (elf->is_64) {
		elf->e_shdr64[i].sh_size += w_size;
		for (i++; i < elf->e_shnum; i++) {
			elf->e_shdr64[i].sh_offset += w_size;
			}
		}
	else {
		elf->e_shdr32[i].sh_size += w_size;
		for (i++; i < elf->e_shnum; i++) {
			elf->e_shdr32[i].sh_offset += w_size;
			}
		}

	/***********************************************/
	/*   Now  update  the  program  header so the  */
	/*   kernel  can load the executable with the  */
	/*   updated file layout.		       */
	/***********************************************/
	seg = elf_phdr_find_by_type(elf, PT_LOAD);
	if (elf->is_64) {
		Elf64_Phdr *p = (Elf64_Phdr *) elf_phdr_ptr(elf, 0);
		p[seg].p_memsz += w_size;
		p[seg].p_filesz += w_size;
		p[seg+1].p_offset += w_size;
		}
	else {
		Elf32_Phdr *p = (Elf32_Phdr *) elf_phdr_ptr(elf, 0);
		p[seg].p_memsz += w_size;
		p[seg].p_filesz += w_size;
		p[seg+1].p_offset += w_size;
		}
}
/**********************************************************************/
/*   Compute .hash bucket size.					      */
/**********************************************************************/
int
get_hash_size(int n)
{	int	i;
/**********************************************************************/
/*   Table of bucket sizes depending on the symbol table size.	      */
/**********************************************************************/
# define	NBKTS	2579
static const int hashsize[] = { 3, 17, 31, 37, 67, 97, 131, 197, 263, 397, 
	619, 1039, 1553, 1709, 1949, 2711, 4019, 7177, NBKTS };

	for (i = 0; i < (int) (sizeof hashsize / sizeof hashsize[0]) - 1; i++) {
		if (hashsize[i+1] >= n)
			return hashsize[i];
		}
	printf("Too many items to hash, please extend the array: %d\n", n);
	return NBKTS;
}
Post created by CRiSP v10.0.2c-b5917
CRiSP, DTrace, and other technobabble

Sunday, 19 December 2010

ELF - elfrewrite finished

No comments:

Post a Comment