-
Notifications
You must be signed in to change notification settings - Fork 161
Fix tracing cow changes #1260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Fix tracing cow changes #1260
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -226,6 +226,12 @@ pub(crate) extern "C" fn generic_init( | |
| hyperlight_guest_tracing::init_guest_tracing(guest_start_tsc); | ||
| } | ||
|
|
||
| // Open a span to partly capture the initialization of the guest. | ||
| // This is done here because the tracing subscriber is initialized and the guest is in a | ||
| // well-known state | ||
| #[cfg(all(feature = "trace_guest", target_arch = "x86_64"))] | ||
| let _entered = tracing::span!(tracing::Level::INFO, "generic_init").entered(); | ||
|
|
||
| #[cfg(feature = "macros")] | ||
| for registration in __private::GUEST_FUNCTION_INIT { | ||
| registration(); | ||
|
|
@@ -235,10 +241,22 @@ pub(crate) extern "C" fn generic_init( | |
| hyperlight_main(); | ||
| } | ||
|
|
||
| // Ensure that any tracing output from the initialisation phase is | ||
| // flushed to the host, if necessary. | ||
| // All this tracing logic shall be done right before the call to `hlt` which is done after this | ||
| // function returns | ||
| #[cfg(all(feature = "trace_guest", target_arch = "x86_64"))] | ||
| hyperlight_guest_tracing::flush(); | ||
| { | ||
| // This span captures the internal dispatch function only, without tracing internals. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this comment shoud say generic_init as opposed to internal dispatch function maybe? |
||
| // Close the span before flushing to ensure that the `flush` call is not included in the span | ||
| // NOTE: This is necessary to avoid closing the span twice. Flush closes all the open | ||
| // spans, when preparing to close a guest function call context. | ||
| // It is not mandatory, though, but avoids a warning on the host that alerts a spans | ||
| // that has not been opened but is being closed. | ||
| _entered.exit(); | ||
|
|
||
| // Ensure that any tracing output from the initialisation phase is | ||
| // flushed to the host, if necessary. | ||
| hyperlight_guest_tracing::flush(); | ||
| } | ||
|
|
||
| dispatch_function as usize as u64 | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -752,12 +752,26 @@ impl HyperlightVm { | |||||||
| #[cfg(feature = "trace_guest")] | ||||||||
| { | ||||||||
| tc.end_host_trace(); | ||||||||
| // Handle the guest trace data if any | ||||||||
| let regs = self.vm.regs().map_err(RunVmError::GetRegs)?; | ||||||||
| if let Err(e) = tc.handle_trace(®s, mem_mgr) { | ||||||||
| // If no trace data is available, we just log a message and continue | ||||||||
| // Is this the right thing to do? | ||||||||
| log::debug!("Error handling guest trace: {:?}", e); | ||||||||
| match result.as_ref() { | ||||||||
| // On the Halt exit we don't receive any trace information | ||||||||
| Ok(VmExit::Halt()) => {} | ||||||||
| _ => { | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: could be simplified slightly to something like if !matches!(result.as_ref(), Ok(VmExit::Halt())) {
let regs = self.vm.regs().map_err(RunVmError::GetRegs)?;
if let Ok(cr3) = self.vm.sregs().map(|s| s.cr3) {
tc.handle_trace(®s, mem_mgr, cr3).unwrap_or_else(|e| {
tracing::error!("Cannot handle trace data: {}", e);
});
} else {
tracing::error!("Failed to get sregs for trace handling");
}
} |
||||||||
| // Handle the guest trace data if any | ||||||||
| let regs = self.vm.regs().map_err(RunVmError::GetRegs)?; | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since we know the |
||||||||
| // If something goes wrong with parsing the trace data, we log the error and | ||||||||
| // continue execution instead of returning an error since this is not critical | ||||||||
| // to correct execution of the guest | ||||||||
| match self.vm.sregs().map(|s| s.cr3) { | ||||||||
| Ok(cr3) => { | ||||||||
| tc.handle_trace(®s, mem_mgr, cr3).unwrap_or_else(|e| { | ||||||||
|
||||||||
| tc.handle_trace(®s, mem_mgr, cr3).unwrap_or_else(|e| { | |
| let root_pt = cr3 & !0xfff_u64; | |
| tc.handle_trace(®s, mem_mgr, root_pt).unwrap_or_else(|e| { |
Copilot
AI
Feb 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This path logs Cannot handle trace data at error level for any tc.handle_trace failure. Since handle_trace returns an error when the trace magic isn’t present, this can become noisy on VM exits that don’t carry trace batches. Consider either gating the call on regs.r12 == OutBAction::TraceBatch as u64 or downgrading the expected "no trace batch" case to debug/trace logging.
| tracing::error!("Cannot handle trace data: {}", e); | |
| tracing::debug!("Cannot handle trace data: {}", e); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -411,6 +411,102 @@ impl SandboxMemoryManager<HostSharedMemory> { | |
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| /// Read guest memory at a Guest Virtual Address (GVA) by walking the | ||
| /// page tables to translate GVA → GPA, then reading from the correct | ||
| /// backing memory (shared_mem or scratch_mem). | ||
| /// | ||
| /// This is necessary because with Copy-on-Write (CoW) the guest's | ||
| /// virtual pages are be backed by physical pages in the scratch | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo "are be" |
||
| /// region rather than being identity-mapped. | ||
| /// | ||
| /// # Arguments | ||
| /// * `gva` - The Guest Virtual Address to read from | ||
| /// * `len` - The number of bytes to read | ||
| /// * `root_pt` - The root page table physical address (CR3) | ||
| #[cfg(feature = "trace_guest")] | ||
| pub(crate) fn read_guest_memory_by_gva( | ||
| &mut self, | ||
| gva: u64, | ||
| len: usize, | ||
| root_pt: u64, | ||
| ) -> Result<Vec<u8>> { | ||
| use hyperlight_common::vmem::PAGE_SIZE; | ||
|
|
||
| use crate::sandbox::snapshot::{SharedMemoryPageTableBuffer, access_gpa}; | ||
|
|
||
| let scratch_size = self.scratch_mem.mem_size(); | ||
|
|
||
| self.shared_mem.with_exclusivity(|snap| { | ||
| self.scratch_mem.with_exclusivity(|scratch| { | ||
| let pt_buf = SharedMemoryPageTableBuffer::new(snap, scratch, scratch_size, root_pt); | ||
|
|
||
| // Walk page tables to get all mappings that cover the GVA range | ||
| let mappings: Vec<_> = unsafe { | ||
| hyperlight_common::vmem::virt_to_phys(&pt_buf, gva, len as u64) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This lengt isn't necessarily aligned to a page I think? This isn't in your code, but I think this might expose an issue where if this length goes past one page, it could truncate the result and not give you the right mappings. We are missing a unit test that covers that scenario in vmem. You could do
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll add some tests for this |
||
| } | ||
| .collect(); | ||
|
|
||
| if mappings.is_empty() { | ||
| return Err(new_error!( | ||
| "No page table mappings found for GVA {:#x} (len {})", | ||
| gva, | ||
| len, | ||
| )); | ||
| } | ||
|
|
||
| let mut result = Vec::with_capacity(len); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this length controlled by the guest? coming from context.rs:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice this was cuaght in #1260 (comment)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. very good point, we need to have some max size we allow for sure
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I think I'll limit it at 4K for now, but in the future we'd be able to shrink it because we would use the virtio queues and we won't care about this. We could even not batch them at all, the queue would do that for us |
||
|
|
||
|
Comment on lines
+428
to
+459
|
||
| for mapping in &mappings { | ||
| // Calculate the offset within this page for the first byte we want | ||
| let page_offset = if mapping.virt_base <= gva { | ||
| (gva - mapping.virt_base) as usize | ||
| } else { | ||
| 0 | ||
| }; | ||
|
|
||
| let bytes_remaining = len - result.len(); | ||
| let available_in_page = PAGE_SIZE - page_offset; | ||
| let bytes_to_copy = bytes_remaining.min(available_in_page); | ||
|
|
||
| // Translate the GPA to host memory | ||
| let gpa = mapping.phys_base + page_offset as u64; | ||
| let (mem, offset) = access_gpa(snap, scratch, scratch_size, gpa) | ||
| .ok_or_else(|| { | ||
| new_error!( | ||
| "Failed to resolve GPA {:#x} to host memory (GVA {:#x})", | ||
| gpa, | ||
| gva | ||
| ) | ||
| })?; | ||
|
|
||
| let slice = mem | ||
| .as_slice() | ||
| .get(offset..offset + bytes_to_copy) | ||
| .ok_or_else(|| { | ||
| new_error!( | ||
| "GPA {:#x} resolved to out-of-bounds host offset {} (need {} bytes)", | ||
| gpa, | ||
| offset, | ||
| bytes_to_copy | ||
| ) | ||
| })?; | ||
|
|
||
| result.extend_from_slice(slice); | ||
| } | ||
|
|
||
| if result.len() != len { | ||
| return Err(new_error!( | ||
| "Could not read full GVA range: got {} of {} bytes", | ||
| result.len(), | ||
| len | ||
| )); | ||
| } | ||
|
|
||
| Ok(result) | ||
| }) | ||
|
Comment on lines
+415
to
+507
|
||
| })?? | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this be factored out to not be duplicated here and in lib.rs?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not really, we need
flushto be right before the call tohltso that no other spans/logs are added.