Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub enum FunctionCallType {
}

/// `Functioncall` represents a call to a function in the guest or host.
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct FunctionCall {
/// The function name
pub function_name: String,
Expand Down
4 changes: 2 additions & 2 deletions src/hyperlight_common/src/vmem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ pub struct Mapping {
/// are being remapped, TLB invalidation may need to be performed
/// afterwards.
pub use arch::map;
/// This function is not presently used for anything, but is useful
/// for debugging
/// This function is presently used for reading the tracing data, also
/// it is useful for debugging
///
/// # Safety
/// This function traverses page table data structures, and should not
Expand Down
6 changes: 3 additions & 3 deletions src/hyperlight_guest/src/exit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ pub(crate) unsafe fn out32(port: u16, val: u32) {
asm!("out dx, eax",
in("dx") port,
in("eax") val,
in("r8") OutBAction::TraceBatch as u64,
in("r9") ptr,
in("r10") len,
in("r12") OutBAction::TraceBatch as u64,
in("r13") ptr,
in("r14") len,
options(preserves_flags, nomem, nostack)
)
};
Expand Down
32 changes: 23 additions & 9 deletions src/hyperlight_guest_bin/src/guest_function/call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,23 +81,25 @@ pub(crate) fn call_guest_function(function_call: FunctionCall) -> Result<Vec<u8>
pub(crate) fn internal_dispatch_function() {
// Read the current TSC to report it to the host with the spans/events
// This helps calculating the timestamps relative to the guest call
#[cfg(feature = "trace_guest")]
{
#[cfg(all(feature = "trace_guest", target_arch = "x86_64"))]
let _entered = {
let guest_start_tsc = hyperlight_guest_tracing::invariant_tsc::read_tsc();
// Reset the trace state for the new guest function call with the new start TSC
// This clears any existing spans/events from previous calls ensuring a clean state
hyperlight_guest_tracing::new_call(guest_start_tsc);
}

let handle = unsafe { GUEST_HANDLE };
tracing::span!(tracing::Level::INFO, "internal_dispatch_function").entered()
};

#[cfg(debug_assertions)]
log::trace!("internal_dispatch_function");
let handle = unsafe { GUEST_HANDLE };

let function_call = handle
.try_pop_shared_input_data_into::<FunctionCall>()
.expect("Function call deserialization failed");

#[cfg(debug_assertions)]
tracing::trace!("{:?}", function_call);

let res = call_guest_function(function_call);

match res {
Expand All @@ -117,8 +119,20 @@ pub(crate) fn internal_dispatch_function() {
}
}

// Ensure that any tracing output during the call is flushed to
// the host, if necessary.
// All this tracing logic shall be done right before the call to `hlt` which is done after this
// function returns
#[cfg(all(feature = "trace_guest", target_arch = "x86_64"))]
hyperlight_guest_tracing::flush();
{
// This span captures the internal dispatch function only, without tracing internals.
// Close the span before flushing to ensure that the `flush` call is not included in the span
// NOTE: This is necessary to avoid closing the span twice. Flush closes all the open
// spans, when preparing to close a guest function call context.
// It is not mandatory, though, but avoids a warning on the host that alerts a spans
// that has not been opened but is being closed.
_entered.exit();

// Ensure that any tracing output during the call is flushed to
// the host, if necessary.
hyperlight_guest_tracing::flush();
}
Comment on lines +126 to +137
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be factored out to not be duplicated here and in lib.rs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really, we need flush to be right before the call to hlt so that no other spans/logs are added.

}
24 changes: 21 additions & 3 deletions src/hyperlight_guest_bin/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,12 @@ pub(crate) extern "C" fn generic_init(
hyperlight_guest_tracing::init_guest_tracing(guest_start_tsc);
}

// Open a span to partly capture the initialization of the guest.
// This is done here because the tracing subscriber is initialized and the guest is in a
// well-known state
#[cfg(all(feature = "trace_guest", target_arch = "x86_64"))]
let _entered = tracing::span!(tracing::Level::INFO, "generic_init").entered();

#[cfg(feature = "macros")]
for registration in __private::GUEST_FUNCTION_INIT {
registration();
Expand All @@ -235,10 +241,22 @@ pub(crate) extern "C" fn generic_init(
hyperlight_main();
}

// Ensure that any tracing output from the initialisation phase is
// flushed to the host, if necessary.
// All this tracing logic shall be done right before the call to `hlt` which is done after this
// function returns
#[cfg(all(feature = "trace_guest", target_arch = "x86_64"))]
hyperlight_guest_tracing::flush();
{
// This span captures the internal dispatch function only, without tracing internals.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this comment shoud say generic_init as opposed to internal dispatch function maybe?

// Close the span before flushing to ensure that the `flush` call is not included in the span
// NOTE: This is necessary to avoid closing the span twice. Flush closes all the open
// spans, when preparing to close a guest function call context.
// It is not mandatory, though, but avoids a warning on the host that alerts a spans
// that has not been opened but is being closed.
_entered.exit();

// Ensure that any tracing output from the initialisation phase is
// flushed to the host, if necessary.
hyperlight_guest_tracing::flush();
}

dispatch_function as usize as u64
}
Expand Down
6 changes: 3 additions & 3 deletions src/hyperlight_guest_tracing/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ fn send_to_host(data: &[u8]) {
in("dx") OutBAction::TraceBatch as u16,
in("al") 0u8,
// Additional magic number to identify the action
in("r8") OutBAction::TraceBatch as u64,
in("r9") data.as_ptr() as u64,
in("r10") data.len() as u64,
in("r12") OutBAction::TraceBatch as u64,
in("r13") data.as_ptr() as u64,
in("r14") data.len() as u64,
);
}
}
Expand Down
26 changes: 20 additions & 6 deletions src/hyperlight_host/src/hypervisor/hyperlight_vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -752,12 +752,26 @@ impl HyperlightVm {
#[cfg(feature = "trace_guest")]
{
tc.end_host_trace();
// Handle the guest trace data if any
let regs = self.vm.regs().map_err(RunVmError::GetRegs)?;
if let Err(e) = tc.handle_trace(&regs, mem_mgr) {
// If no trace data is available, we just log a message and continue
// Is this the right thing to do?
log::debug!("Error handling guest trace: {:?}", e);
match result.as_ref() {
// On the Halt exit we don't receive any trace information
Ok(VmExit::Halt()) => {}
_ => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: could be simplified slightly to something like

if !matches!(result.as_ref(), Ok(VmExit::Halt())) {
    let regs = self.vm.regs().map_err(RunVmError::GetRegs)?;
    if let Ok(cr3) = self.vm.sregs().map(|s| s.cr3) {
        tc.handle_trace(&regs, mem_mgr, cr3).unwrap_or_else(|e| {
            tracing::error!("Cannot handle trace data: {}", e);
        });
    } else {
        tracing::error!("Failed to get sregs for trace handling");
    }
}

// Handle the guest trace data if any
let regs = self.vm.regs().map_err(RunVmError::GetRegs)?;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we know the outb action ( in("r12") OutBAction::TraceBatch as u64), could we skip doing this for every instance with something like:

     
if regs.r12 == OutBAction::TraceBatch then
 do all the tracing stuff here

// If something goes wrong with parsing the trace data, we log the error and
// continue execution instead of returning an error since this is not critical
// to correct execution of the guest
match self.vm.sregs().map(|s| s.cr3) {
Ok(cr3) => {
tc.handle_trace(&regs, mem_mgr, cr3).unwrap_or_else(|e| {
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

virt_to_phys expects root_pt to be a page-aligned PML4 base, but the value passed here comes directly from sregs.cr3 and may include low flag/PCID bits. The guest-side root_table() masks with & !0xfff; the host should do the same (or use a shared helper) before walking page tables, otherwise translation can read the wrong entries.

Suggested change
tc.handle_trace(&regs, mem_mgr, cr3).unwrap_or_else(|e| {
let root_pt = cr3 & !0xfff_u64;
tc.handle_trace(&regs, mem_mgr, root_pt).unwrap_or_else(|e| {

Copilot uses AI. Check for mistakes.
tracing::error!("Cannot handle trace data: {}", e);
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This path logs Cannot handle trace data at error level for any tc.handle_trace failure. Since handle_trace returns an error when the trace magic isn’t present, this can become noisy on VM exits that don’t carry trace batches. Consider either gating the call on regs.r12 == OutBAction::TraceBatch as u64 or downgrading the expected "no trace batch" case to debug/trace logging.

Suggested change
tracing::error!("Cannot handle trace data: {}", e);
tracing::debug!("Cannot handle trace data: {}", e);

Copilot uses AI. Check for mistakes.
})
}
Err(e) => {
tracing::error!("Failed to get sregs for trace handling: {e}");
}
}
}
}
}
result
Expand Down
96 changes: 96 additions & 0 deletions src/hyperlight_host/src/mem/mgr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,102 @@ impl SandboxMemoryManager<HostSharedMemory> {

Ok(())
}

/// Read guest memory at a Guest Virtual Address (GVA) by walking the
/// page tables to translate GVA → GPA, then reading from the correct
/// backing memory (shared_mem or scratch_mem).
///
/// This is necessary because with Copy-on-Write (CoW) the guest's
/// virtual pages are be backed by physical pages in the scratch
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo "are be"

/// region rather than being identity-mapped.
///
/// # Arguments
/// * `gva` - The Guest Virtual Address to read from
/// * `len` - The number of bytes to read
/// * `root_pt` - The root page table physical address (CR3)
#[cfg(feature = "trace_guest")]
pub(crate) fn read_guest_memory_by_gva(
&mut self,
gva: u64,
len: usize,
root_pt: u64,
) -> Result<Vec<u8>> {
use hyperlight_common::vmem::PAGE_SIZE;

use crate::sandbox::snapshot::{SharedMemoryPageTableBuffer, access_gpa};

let scratch_size = self.scratch_mem.mem_size();

self.shared_mem.with_exclusivity(|snap| {
self.scratch_mem.with_exclusivity(|scratch| {
let pt_buf = SharedMemoryPageTableBuffer::new(snap, scratch, scratch_size, root_pt);

// Walk page tables to get all mappings that cover the GVA range
let mappings: Vec<_> = unsafe {
hyperlight_common::vmem::virt_to_phys(&pt_buf, gva, len as u64)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This lengt isn't necessarily aligned to a page I think?

This isn't in your code, but I think this might expose an issue where if this length goes past one page, it could truncate the result and not give you the right mappings. We are missing a unit test that covers that scenario in vmem.

You could do len as u64 + page_offset but we should probably fix it in vmem so that we don't accidently mis use this function again

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add some tests for this

}
.collect();

if mappings.is_empty() {
return Err(new_error!(
"No page table mappings found for GVA {:#x} (len {})",
gva,
len,
));
}

let mut result = Vec::with_capacity(len);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this length controlled by the guest? coming from context.rs:

let trace_data_len = regs.r14 as usize;
        let buf = mem_mgr.read_guest_memory_by_gva(trace_data_gva, trace_data_len, root_pt)?;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice this was cuaght in #1260 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very good point, we need to have some max size we allow for sure

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I think I'll limit it at 4K for now, but in the future we'd be able to shrink it because we would use the virtio queues and we won't care about this. We could even not batch them at all, the queue would do that for us


Comment on lines +428 to +459
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

read_guest_memory_by_gva allocates Vec::with_capacity(len) using a length provided by the guest (r14). A malicious/buggy guest can request an arbitrarily large len and cause host OOM. Consider enforcing a strict upper bound (e.g., a max trace batch size) before allocating, and return an error if exceeded.

Copilot uses AI. Check for mistakes.
for mapping in &mappings {
// Calculate the offset within this page for the first byte we want
let page_offset = if mapping.virt_base <= gva {
(gva - mapping.virt_base) as usize
} else {
0
};

let bytes_remaining = len - result.len();
let available_in_page = PAGE_SIZE - page_offset;
let bytes_to_copy = bytes_remaining.min(available_in_page);

// Translate the GPA to host memory
let gpa = mapping.phys_base + page_offset as u64;
let (mem, offset) = access_gpa(snap, scratch, scratch_size, gpa)
.ok_or_else(|| {
new_error!(
"Failed to resolve GPA {:#x} to host memory (GVA {:#x})",
gpa,
gva
)
})?;

let slice = mem
.as_slice()
.get(offset..offset + bytes_to_copy)
.ok_or_else(|| {
new_error!(
"GPA {:#x} resolved to out-of-bounds host offset {} (need {} bytes)",
gpa,
offset,
bytes_to_copy
)
})?;

result.extend_from_slice(slice);
}

if result.len() != len {
return Err(new_error!(
"Could not read full GVA range: got {} of {} bytes",
result.len(),
len
));
}

Ok(result)
})
Comment on lines +415 to +507
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New page-table-walk-based trace reading logic is introduced here, but there’s no unit test coverage validating cross-page reads, unmapped gaps, and scratch-vs-snapshot backing resolution. Since this file already has paging-related tests, consider adding tests (under appropriate feature gating) that exercise read_guest_memory_by_gva for both identity-mapped and CoW/scratch-backed pages.

Copilot generated this review using guidance from repository custom instructions.
})??
}
}

#[cfg(test)]
Expand Down
4 changes: 2 additions & 2 deletions src/hyperlight_host/src/sandbox/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ fn hash(memory: &[u8], regions: &[MemoryRegion]) -> Result<[u8; 32]> {
Ok(hasher.finalize().into())
}

fn access_gpa<'a>(
pub(crate) fn access_gpa<'a>(
snap: &'a ExclusiveSharedMemory,
scratch: &'a ExclusiveSharedMemory,
scratch_size: usize,
Expand All @@ -197,7 +197,7 @@ pub(crate) struct SharedMemoryPageTableBuffer<'a> {
root: u64,
}
impl<'a> SharedMemoryPageTableBuffer<'a> {
fn new(
pub(crate) fn new(
snap: &'a ExclusiveSharedMemory,
scratch: &'a ExclusiveSharedMemory,
scratch_size: usize,
Expand Down
Loading