Panel użytkownika
Nazwa użytkownika:
Hasło:
Nie masz jeszcze konta?

[KVM] Maszyna startująca w trybie długim

Ostatnio zmodyfikowano 2017-04-26 04:09
Autor Wiadomość
Elaine
Temat założony przez niniejszego użytkownika
[KVM] Maszyna startująca w trybie długim
» 2017-04-26 04:09:08
Przykład maszyny wirtualnej startującej od razu w trybie długim — raz, że potrzebne na starszych procesorach, które nie wspierają sprzętowej wirtualizacji trybu rzeczywistego, dwa, że ładniej wygląda niż maszyna zaczynająca w trybie rzeczywistym.

Większość tego kodu to wrappery, żeby się z gołymi ioctlami na każdym kroku nie bawić. Właściwe ustawienie stanu maszyny znajduje się w setup_long_mode.

Kod wykonywany przez maszynę wirtualną jest prosty i trochę bez sensu, bo to tylko przykład — nie chodzi o to, by robił coś pożytecznego, tylko żeby pokazywał, że maszyna faktycznie wstaje od razu w trybie długim, w ring 0:
push %rdi
mov %cr2, %rcx
movabs $0x123456789ABCDEF0, %rax
hlt

Kompletny kod:
C/C++
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <stdexcept>
#include <type_traits>
#include <errno.h>
#include <fcntl.h>
#include <linux/kvm.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>

class file_descriptor {
public:
    explicit file_descriptor( int fd ) noexcept;
    ~file_descriptor() noexcept;
    file_descriptor( file_descriptor && other ) noexcept;
    file_descriptor & operator =( file_descriptor other ) noexcept;
    file_descriptor( const file_descriptor & ) = delete;
    int get() const noexcept;
private:
    int fd;
};

file_descriptor::file_descriptor( int fd ) noexcept
    : fd( fd )
{
}

file_descriptor::~file_descriptor() noexcept
{
    close( fd );
}

file_descriptor::file_descriptor( file_descriptor && other ) noexcept
    : fd( other.fd )
{
    other.fd = - 1;
}

file_descriptor & file_descriptor::operator =( file_descriptor other ) noexcept
{
    std::swap( fd, other.fd );
    return * this;
}

int file_descriptor::get() const noexcept
{
    return fd;
}


class kvm_error
    : public std::runtime_error
{
public:
    explicit kvm_error( const char * message, int code );
    int code() const noexcept;
private:
    int error_code;
};

kvm_error::kvm_error( const char * message, int code )
    : std::runtime_error( message )
     , error_code( code )
{
}

int kvm_error::code() const noexcept
{
    return error_code;
}

class kvm_version_error
    : public kvm_error
{
public:
    explicit kvm_version_error();
};

kvm_version_error::kvm_version_error()
    : kvm_error( "Unknown KVM API version", EINVAL )
{
}

template < typename T >
T throw_if_failed( T result, const char * message )
{
    if( result == - 1 ) {
        throw kvm_error( message, errno );
    }
    return result;
}


class memory_block {
public:
    explicit memory_block( std::size_t size_in_bytes );
    explicit memory_block( std::size_t size_in_bytes, int fd );
    ~memory_block() noexcept;
    memory_block( memory_block && other ) noexcept;
    memory_block & operator =( memory_block other ) noexcept;
    memory_block( const memory_block & ) = delete;
   
    void * data() noexcept;
    std::size_t size() noexcept;
    template < typename T >
    void store( std::uintptr_t offset, const T & value ) noexcept;
    void store_bytes( std::uintptr_t offset, const void * source,
    std::size_t how_many ) noexcept;
private:
    void * block_begin;
    std::size_t block_bytes;
};

memory_block::memory_block( std::size_t size_in_bytes )
    : block_begin( mmap( nullptr, size_in_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, - 1, 0 ) )
     , block_bytes( size_in_bytes )
{
    if( block_begin == MAP_FAILED ) {
        throw kvm_error( "Memory allocation failed", errno );
    }
}

memory_block::memory_block( std::size_t size_in_bytes, int fd )
    : block_begin( mmap( nullptr, size_in_bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 ) )
     , block_bytes( size_in_bytes )
{
    if( block_begin == MAP_FAILED ) {
        throw kvm_error( "File mapping failed", errno );
    }
}

memory_block::~memory_block() noexcept
{
    munmap( block_begin, block_bytes );
}

memory_block::memory_block( memory_block && other ) noexcept
    : block_begin( other.block_begin )
     , block_bytes( other.block_bytes )
{
    other.block_begin = MAP_FAILED;
    other.block_bytes = 0;
}

memory_block & memory_block::operator =( memory_block other ) noexcept
{
    std::swap( block_begin, other.block_begin );
    std::swap( block_bytes, other.block_bytes );
    return * this;
}

void * memory_block::data() noexcept
{
    return block_begin;
}

std::size_t memory_block::size() noexcept
{
    return block_bytes;
}

template < typename T >
void memory_block::store( std::uintptr_t offset, const T & value ) noexcept
{
    static_assert( std::is_standard_layout < T >::value, "" );
    store_bytes( offset, & value, sizeof( T ) );
}

void memory_block::store_bytes( std::uintptr_t offset, const void * source,
std::size_t how_many ) noexcept
{
    memcpy( static_cast < char *>( block_begin ) + offset, source, how_many );
}


class kvm_machine;

class kvm_control_device {
public:
    kvm_control_device();
    kvm_machine create_machine();
    unsigned get_cpu_control_block_size();
private:
    file_descriptor fd;
};

class kvm_cpu;

class kvm_machine {
public:
    void set_memory_region( std::uint32_t slot, std::uint64_t guest_address,
    memory_block & memory );
    kvm_cpu create_cpu( unsigned long cpu_id );
private:
    friend class kvm_control_device;
    explicit kvm_machine( int fd ) noexcept;
   
    file_descriptor fd;
};

class kvm_cpu_control_block;

class kvm_cpu {
public:
    kvm_cpu_control_block get_control_block( kvm_control_device & control );
    kvm_regs get_registers();
    void set_registers( const kvm_regs & regs );
    kvm_sregs get_special_registers();
    void set_special_registers( const kvm_sregs & regs );
    void run();
private:
    friend class kvm_machine;
    explicit kvm_cpu( int fd ) noexcept;
   
    file_descriptor fd;
};

class kvm_cpu_control_block {
public:
    kvm_run & get() noexcept;
    kvm_run & operator *() noexcept;
    kvm_run * operator ->() noexcept;
private:
    friend class kvm_cpu;
    explicit kvm_cpu_control_block( std::size_t size, int fd );
   
    memory_block block;
};

kvm_control_device::kvm_control_device()
    : fd( throw_if_failed( open( "/dev/kvm", O_RDWR | O_CLOEXEC ), "Cannot open /dev/kvm" ) )
{
    const auto version = throw_if_failed(
    ioctl( fd.get(), KVM_GET_API_VERSION, nullptr ),
    "Cannot get KVM API version" );
    if( version != KVM_API_VERSION ) {
        throw kvm_version_error();
    }
}

kvm_machine kvm_control_device::create_machine()
{
    return kvm_machine( throw_if_failed( ioctl( fd.get(), KVM_CREATE_VM, nullptr ),
    "VM creation failed" ) );
}

unsigned kvm_control_device::get_cpu_control_block_size()
{
    const auto size = throw_if_failed(
    ioctl( fd.get(), KVM_GET_VCPU_MMAP_SIZE, nullptr ),
    "Getting CPU control block size failed" );
    if( size < static_cast < std::ptrdiff_t >( sizeof( kvm_run ) ) ) {
        throw kvm_error( "CPU control block size too small", EINVAL );
    }
    return static_cast < unsigned >( size );
}


void kvm_machine::set_memory_region( std::uint32_t slot,
std::uint64_t guest_address, memory_block & memory )
{
    kvm_userspace_memory_region region = { };
    region.slot = slot;
    region.guest_phys_addr = guest_address;
    region.userspace_addr = reinterpret_cast < std::uintptr_t >( memory.data() );
    region.memory_size = memory.size();
    throw_if_failed( ioctl( fd.get(), KVM_SET_USER_MEMORY_REGION, & region ),
    "Setting memory region failed" );
}

kvm_cpu kvm_machine::create_cpu( unsigned long cpu_id )
{
    return kvm_cpu( throw_if_failed( ioctl( fd.get(), KVM_CREATE_VCPU, cpu_id ),
    "Virtual CPU creation failed" ) );
}

kvm_machine::kvm_machine( int fd ) noexcept
    : fd( fd )
{
}


kvm_cpu_control_block kvm_cpu::get_control_block( kvm_control_device & control )
{
    const auto size = control.get_cpu_control_block_size();
    return kvm_cpu_control_block( size, fd.get() );
}

kvm_regs kvm_cpu::get_registers()
{
    kvm_regs result;
    throw_if_failed( ioctl( fd.get(), KVM_GET_REGS, & result ),
    "Getting registers failed" );
    return result;
}

void kvm_cpu::set_registers( const kvm_regs & regs )
{
    throw_if_failed( ioctl( fd.get(), KVM_SET_REGS, & regs ),
    "Setting registers failed" );
}

kvm_sregs kvm_cpu::get_special_registers()
{
    kvm_sregs result;
    throw_if_failed( ioctl( fd.get(), KVM_GET_SREGS, & result ),
    "Getting special registers failed" );
    return result;
}

void kvm_cpu::set_special_registers( const kvm_sregs & regs )
{
    throw_if_failed( ioctl( fd.get(), KVM_SET_SREGS, & regs ),
    "Setting special registers failed" );
}

void kvm_cpu::run()
{
    throw_if_failed( ioctl( fd.get(), KVM_RUN, nullptr ), "Run failed" );
}

kvm_cpu::kvm_cpu( int fd ) noexcept
    : fd( fd )
{
}


kvm_run & kvm_cpu_control_block::get() noexcept
{
    return * static_cast < kvm_run *>( block.data() );
}

kvm_run & kvm_cpu_control_block::operator *() noexcept
{
    return * static_cast < kvm_run *>( block.data() );
}

kvm_run * kvm_cpu_control_block::operator ->() noexcept
{
    return static_cast < kvm_run *>( block.data() );
}

kvm_cpu_control_block::kvm_cpu_control_block( std::size_t size, int fd )
    : block( size, fd )
{
}

constexpr std::uintptr_t program_base = 0x0000;
constexpr std::uintptr_t paging_structures_base = 0x3000;

void setup_long_mode( kvm_cpu & cpu, memory_block & guest_memory )
{
    auto special_registers = cpu.get_special_registers();
    // Turn on all the flags required for long mode (CR0.PG, CR0.PE, CR4.PAE,
    // EFER.LME, EFER.LMA)
    special_registers.cr0 |= UINT64_C( 1 ) << 31;
    special_registers.cr0 |= UINT64_C( 1 ) << 0;
    special_registers.cr4 |= UINT64_C( 1 ) << 5;
    special_registers.efer |= UINT64_C( 1 ) << 8;
    special_registers.efer |= UINT64_C( 1 ) << 10;
    // Set up identity paging for the first 2 MB.
    constexpr std::uintptr_t pdpte_address = paging_structures_base + 0x1000;
    constexpr std::uintptr_t pte_address = paging_structures_base + 0x2000;
    constexpr std::uint64_t pml4_entry = pdpte_address | 0x003;
    constexpr std::uint64_t pdpte_entry = pte_address | 0x003;
    constexpr std::uint64_t pte_entry = 0x0 | 0x083;
    guest_memory.store( paging_structures_base, pml4_entry );
    guest_memory.store( pdpte_address, pdpte_entry );
    guest_memory.store( pte_address, pte_entry );
   
    special_registers.cr3 = paging_structures_base;
    // Set up the segment registers. We can set the cached register values
    // directly, no need for GDT, far jumps or the rest of this nonsense.
    special_registers.gdt.base = 0;
    special_registers.gdt.limit = 0;
   
    special_registers.cs.selector = 0x08;
    special_registers.cs.base = 0;
    special_registers.cs.limit = 0;
    special_registers.cs.type = 0b1011;
    special_registers.cs.present = 1;
    special_registers.cs.dpl = 0;
    special_registers.cs.db = 0;
    special_registers.cs.s = 1;
    special_registers.cs.l = 1;
    special_registers.cs.g = 0;
    special_registers.cs.unusable = 0;
   
    special_registers.ds.selector = 0x10;
    special_registers.ds.base = 0;
    special_registers.ds.limit = 0;
    special_registers.ds.type = 0b0011;
    special_registers.ds.dpl = 0;
    special_registers.ds.db = 0;
    special_registers.ds.s = 1;
    special_registers.ds.l = 0;
    special_registers.ds.g = 0;
    special_registers.ds.unusable = 0;
    special_registers.es = special_registers.ds;
    special_registers.fs = special_registers.ds;
    special_registers.gs = special_registers.ds;
    special_registers.ss = special_registers.ds;
    // Set up an empty IDT. This will turn any exception into a triple fault,
    // but we don't care.
    special_registers.idt.base = 0;
    special_registers.idt.limit = 0;
   
    cpu.set_special_registers( special_registers );
}

extern "C" unsigned char vm_code_begin;
extern "C" unsigned char vm_code_end;

asm(
".section .rodata.vm_code,\"aG\",@progbits,vm_code\n"
"vm_code_begin:\n"
"push %rdi\n"
"mov %cr2, %rcx\n"
"movabs $0x123456789ABCDEF0, %rax\n"
"hlt\n"
"vm_code_end:\n"
);

void load_program( kvm_cpu & cpu, memory_block & guest_memory )
{
    guest_memory.store_bytes( program_base, & vm_code_begin,
    static_cast < std::size_t >( & vm_code_end - & vm_code_begin ) );
    kvm_regs registers = { };
    registers.rax = 0xDEADBEEF;
    registers.rflags = 0x02;
    registers.rip = program_base;
    registers.rsp = program_base + 0x1000;
    cpu.set_registers( registers );
}

void run_loop( kvm_cpu & cpu, kvm_cpu_control_block & control_block )
{
    for(;; ) {
        cpu.run();
        const auto exit_reason = control_block->exit_reason;
        switch( exit_reason ) {
        case KVM_EXIT_HLT: {
                const auto result = cpu.get_registers().rax;
                if( result == 0x123456789ABCDEF0 ) {
                    return;
                } else {
                    throw std::runtime_error(
                    "Unexpected result: " + std::to_string( result ) );
                }
            }
            default:
            throw std::runtime_error(
            "Unknown exit reason: " + std::to_string( exit_reason ) );
        }
    }
}

int main()
{
    try {
        kvm_control_device control_device;
        auto machine = control_device.create_machine();
        memory_block guest_memory( 1 * 1024 * 1024 );
        machine.set_memory_region( 0, 0x0, guest_memory );
        auto cpu = machine.create_cpu( 0 );
        auto cpu_control_block = cpu.get_control_block( control_device );
        setup_long_mode( cpu, guest_memory );
        load_program( cpu, guest_memory );
        run_loop( cpu, cpu_control_block );
        return 0;
    } catch( const kvm_version_error & e ) {
        std::clog << e.what() << '\n';
    } catch( const kvm_error & e ) {
        std::clog << e.what() << ": " << strerror( e.code() ) << '\n';
    } catch( const std::exception & e ) {
        std::clog << e.what() << '\n';
    }
    return 1;
}
P-160501
« 1 »
  Strona 1 z 1