0

I need to create linux kernel module that hooks some system calls. And with some calls I get strange error, for example with __NR_unlinkat (sys_unlinkat). The code of the module:

#include <linux/module.h> 
#include <linux/kernel.h> 
#include <linux/init.h>  
#include <linux/kallsyms.h>
#include <linux/namei.h>
#include <linux/unistd.h>
#include <linux/highmem.h>
#include <linux/fdtable.h>
#include <linux/sched.h>


MODULE_LICENSE("GPL");
MODULE_AUTHOR("Test");
MODULE_DESCRIPTION("Test test");

static unsigned long **p_sys_call_table;


typedef asmlinkage long (*unlinkat_cb_t)(int dir_fd, const char __user    *pathname, int flags);
static unlinkat_cb_t    original_unlinkat_syscall;


static int set_page_rw(void *address)
{
   unsigned int level;
   pte_t *pte = lookup_address((unsigned long)address, &level);

   if(pte->pte &~ _PAGE_RW)
      pte->pte |= _PAGE_RW;
   return 0;
}

int set_page_ro(void *address)
{
   unsigned int level;
   pte_t *pte = lookup_address((unsigned long)address, &level);
   pte->pte = pte->pte &~ _PAGE_RW;
   return 0;
}


static asmlinkage int wrapper_sys_unlinkat(int dir_fd, const char* filename, int flags)
{
    printk(KERN_WARNING "Test module: print something\n");
    return original_unlinkat_syscall(dir_fd, filename, flags);
}

// module init / stop functions


static int __init test_module_init(void)
{
    p_sys_call_table = (void*) kallsyms_lookup_name("sys_call_table");  // the most authorative source

    printk(KERN_INFO "Test module 1: Start address of system syscall table: %p\n", p_sys_call_table);

    // save original handlers
    original_unlinkat_syscall  = (unlinkat_cb_t)  p_sys_call_table[__NR_unlinkat];

    // install our handlers
    set_page_rw(p_sys_call_table);

    p_sys_call_table[__NR_unlinkat]  = (void*) wrapper_sys_unlinkat;

    set_page_ro(p_sys_call_table);

    printk(KERN_INFO "Test module: wrappers for syscalls installed\n");
    return 0;    // Non-zero return means that the module couldn't be loaded.
}


static void __exit test_module_cleanup(void)
{
    // Restore the original syscall handlers
    set_page_rw(p_sys_call_table);

    p_sys_call_table[__NR_unlinkat] = (void*) original_unlinkat_syscall;

    set_page_ro(p_sys_call_table);

    printk(KERN_INFO "Test module: Original handlers for syscalls restored\n");
}

module_init(test_module_init);
module_exit(test_module_cleanup);

The problem is in any operation before calling original_unlinkat_syscall in the function wrapper_sys_unlinkat. In my example I use printk but it may be any other operation. If I comment line printk(KERN_WARNING "Test module: print something\n"); the module works without errors. But with this line I get the following:

[16089.610751] Test module: print something
[16089.610759] BUG: unable to handle kernel paging request at 0000000080cb7fb8
[16089.611849] PGD 0 P4D 0
[16089.612237] Oops: 0000 [#3] SMP PTI
[16089.612582] Modules linked in: test_module(OE) bc_rc6(OE) bc_serpent(OE) bc_cast(OE) bc_rijn(OE) bc_idea(OE) bc_3des(OE) bc_bf128(OE) bc_bf448(OE) bc_twofish(OE) bc_gost(OE) bc_des(OE) bc_blowfish(OE) bestcrypt(OE) xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set devlink nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_raw iptable_security ebtable_filter ebtables ip6table_filter ip6_tables sunrpc snd_intel8x0 snd_ac97_codec ac97_bus snd_seq snd_seq_device intel_powerclamp
[16089.615814]  crct10dif_pclmul snd_pcm crc32_pclmul snd_timer ghash_clmulni_intel intel_rapl_perf joydev snd soundcore vboxguest video i2c_piix4 vboxvideo(C) drm_kms_helper ttm drm crc32c_intel e1000 serio_raw ata_generic pata_acpi
[16089.616897] CPU: 0 PID: 790 Comm: abrt-dump-journ Tainted: G      D  C OE     4.17.19-200.fc28.x86_64 #1
[16089.617690] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[16089.618118] RIP: 0010:__x64_sys_unlinkat+0x6/0x50
[16089.618494] RSP: 0018:ffffacc980cb7f10 EFLAGS: 00010246
[16089.618933] RAX: ffffffff882b35e0 RBX: 0000000080cb7f58 RCX: 0000000000000000
[16089.619314] RDX: 0000000000000000 RSI: ffffacc980cb7f58 RDI: 0000000080cb7f58
[16089.619728] RBP: ffffacc980cb7f58 R08: 0000000000000005 R09: 0000000000000261
[16089.620124] R10: 0000000000000000 R11: ffffffff8999b1ad R12: 0000000000000000
[16089.620507] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[16089.620889] FS:  00007fecca34cac0(0000) GS:ffff9e6d9fc00000(0000) knlGS:0000000000000000
[16089.621315] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[16089.621695] CR2: 0000000080cb7fb8 CR3: 00000001184fa000 CR4: 00000000000406f0
[16089.622117] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[16089.622569] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[16089.622950] Call Trace:
[16089.623333]  wrapper_sys_unlinkat+0x31/0x36 [test_module]
[16089.623715]  do_syscall_64+0x5b/0x160
[16089.624086]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[16089.624453] RIP: 0033:0x7fecc8c52b37
[16089.624811] RSP: 002b:00007ffde8e706a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000107
[16089.625179] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fecc8c52b37
[16089.625549] RDX: 0000000000000000 RSI: 00005611568c9130 RDI: 0000000000000023
[16089.625966] RBP: 00005611568c9130 R08: 0000000000000000 R09: 00000000000001a0
[16089.626330] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000000001a0
[16089.626820] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000023
[16089.627168] Code: fc ff ff 66 90 0f 1f 44 00 00 48 8b 7f 70 31 f6 31 d2 e8 5e e2 ff ff bf 9c ff ff ff 48 89 c6 e9 61 fc ff ff 90 0f 1f 44 00 00 53 <48> 8b 47 60 48 8b 4f 68 48 8b 5f 70 a9 ff fd ff ff 75 27 f6 c4
[16089.628225] RIP: __x64_sys_unlinkat+0x6/0x50 RSP: ffffacc980cb7f10
[16089.628760] CR2: 0000000080cb7fb8
[16089.629838] ---[ end trace 9f4543d566579223 ]---

This issue happens only in fedora 28 x86_64 (Linux localhost.localdomain 4.17.19-200.fc28.x86_64 #1 SMP Fri Aug 24 15:47:41 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux), in 32 bit fedora 28 version and other linux systems it works fine. Could anybody help me please?

Nadezhda
  • 11
  • 3

1 Answers1

1

I've found an answer myself.

In Linux kernels prior to version 4.16, system call processing for the x86_64 architecture has some additional optimizations. Some of these optimizations require the system call handler to be implemented in the assembler. These kinds of handlers are either hard or impossible to replace with custom handlers written in C. Source

So for new kernels my code doesn't work :(. It seems that code without any operations before original system call worked due to some kind of gcc optimization. If I add -O0 to cflags I get system crash too.

Nadezhda
  • 11
  • 3