From 83b5209f0d63e77447a24b29f42179b6ab6d874d Mon Sep 17 00:00:00 2001 From: Maciej Bartczak <39600846+maciekbartczak@users.noreply.github.com> Date: Sat, 24 May 2025 13:06:02 +0200 Subject: [PATCH 1/8] Implement filtering on encoding picker --- src/bin/edit/draw_statusbar.rs | 65 +++++++++++++++++++++++++++------- src/bin/edit/state.rs | 4 +++ src/lib.rs | 1 + 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/bin/edit/draw_statusbar.rs b/src/bin/edit/draw_statusbar.rs index 0eba7de36245..aad5fc5cf6f7 100644 --- a/src/bin/edit/draw_statusbar.rs +++ b/src/bin/edit/draw_statusbar.rs @@ -2,6 +2,7 @@ // Licensed under the MIT License. use edit::framebuffer::{Attributes, IndexedColor}; +use edit::fuzzy::score_fuzzy; use edit::helpers::*; use edit::input::vk; use edit::tui::*; @@ -205,25 +206,63 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { if reopen { loc(LocId::EncodingReopen) } else { loc(LocId::EncodingConvert) }, ); { - ctx.scrollarea_begin("scrollarea", Size { width, height }); - ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4)); - ctx.inherit_focus(); + ctx.table_begin("encoding-picker"); + ctx.table_set_columns(&[0]); + { - let encodings = icu::get_available_encodings(); + ctx.table_next_row(); - ctx.list_begin("encodings"); + ctx.scrollarea_begin("scrollarea", Size { width, height }); + ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4)); ctx.inherit_focus(); - for &encoding in encodings { - if ctx.list_item(encoding == doc.buffer.borrow().encoding(), encoding) - == ListSelection::Activated - { - change = Some(encoding); - break; + ctx.focus_on_first_present(); + { + let encodings = icu::get_available_encodings() + .iter() + .filter_map(|&enc| { + if state.encoding_picker_needle.is_empty() { + return Some(enc); + } + + let (score, _) = + score_fuzzy(ctx.arena(), enc, &state.encoding_picker_needle, true); + + if score > 0 { Some(enc) } else { None } + }) + .collect::>(); + + ctx.list_begin("encodings"); + ctx.inherit_focus(); + + for encoding in encodings { + if ctx.list_item(encoding == doc.buffer.borrow().encoding(), encoding) + == ListSelection::Activated + { + change = Some(encoding); + break; + } } + ctx.list_end(); } - ctx.list_end(); + ctx.scrollarea_end(); } - ctx.scrollarea_end(); + + { + ctx.table_next_row(); + + ctx.table_begin("encoding-search"); + ctx.table_set_columns(&[0, width]); + + ctx.table_next_row(); + + ctx.label("needle-label", loc(LocId::SearchNeedleLabel)); + ctx.editline("needle", &mut state.encoding_picker_needle); + ctx.inherit_focus(); + + ctx.table_end(); + } + + ctx.table_end(); } if ctx.modal_end() { state.wants_encoding_change = StateEncodingChange::None; diff --git a/src/bin/edit/state.rs b/src/bin/edit/state.rs index bc8ba87d5e34..e0bb14fc718d 100644 --- a/src/bin/edit/state.rs +++ b/src/bin/edit/state.rs @@ -159,6 +159,8 @@ pub struct State { pub osc_clipboard_send_generation: u32, pub osc_clipboard_always_send: bool, pub exit: bool, + + pub encoding_picker_needle: String, } impl State { @@ -203,6 +205,8 @@ impl State { osc_clipboard_send_generation: 0, osc_clipboard_always_send: false, exit: false, + + encoding_picker_needle: Default::default(), }) } } diff --git a/src/lib.rs b/src/lib.rs index e0e5aeba58a7..24184a58a1ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,7 @@ pub mod buffer; pub mod cell; pub mod document; pub mod framebuffer; +pub mod fuzzy; pub mod hash; pub mod helpers; pub mod icu; From 196de5a22372b977d24ff488b25fb05365270e76 Mon Sep 17 00:00:00 2001 From: Maciej Bartczak <39600846+maciekbartczak@users.noreply.github.com> Date: Sat, 24 May 2025 15:57:46 +0200 Subject: [PATCH 2/8] Handle encoding aliases --- src/bin/edit/draw_statusbar.rs | 45 ++++++++++++++++++++----- src/icu.rs | 60 ++++++++++++++++++++++++++++------ 2 files changed, 86 insertions(+), 19 deletions(-) diff --git a/src/bin/edit/draw_statusbar.rs b/src/bin/edit/draw_statusbar.rs index aad5fc5cf6f7..8a113151cefc 100644 --- a/src/bin/edit/draw_statusbar.rs +++ b/src/bin/edit/draw_statusbar.rs @@ -4,6 +4,7 @@ use edit::framebuffer::{Attributes, IndexedColor}; use edit::fuzzy::score_fuzzy; use edit::helpers::*; +use edit::icu::EncodingInfo; use edit::input::vk; use edit::tui::*; use edit::{arena_format, icu}; @@ -219,26 +220,52 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { { let encodings = icu::get_available_encodings() .iter() - .filter_map(|&enc| { + .filter_map(|enc| { if state.encoding_picker_needle.is_empty() { return Some(enc); } - let (score, _) = - score_fuzzy(ctx.arena(), enc, &state.encoding_picker_needle, true); + let (name_score, _) = + score_fuzzy(ctx.arena(), enc.name, &state.encoding_picker_needle, true); - if score > 0 { Some(enc) } else { None } + if name_score > 0 { + return Some(enc); + } + + let alias_matches = enc.aliases.iter().any(|alias| { + let (alias_score, _) = score_fuzzy( + ctx.arena(), + alias, + &state.encoding_picker_needle, + true, + ); + alias_score > 0 + }); + + if alias_matches { Some(enc) } else { None } }) - .collect::>(); + .collect::>(); ctx.list_begin("encodings"); ctx.inherit_focus(); - + for encoding in encodings { - if ctx.list_item(encoding == doc.buffer.borrow().encoding(), encoding) + let label = if encoding.aliases.is_empty() { + encoding.name + } else { + let aliases = encoding + .aliases + .iter() + .map(|alias| format!("\"{}\"", alias)) + .collect::>() + .join(", "); + &format!("{} ({})", encoding.name, aliases) + }; + + if ctx.list_item(encoding.name == doc.buffer.borrow().encoding(), label) == ListSelection::Activated { - change = Some(encoding); + change = Some(encoding.name); break; } } @@ -261,7 +288,7 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { ctx.table_end(); } - + ctx.table_end(); } if ctx.modal_end() { diff --git a/src/icu.rs b/src/icu.rs index d8f36a7d084d..8120a45694a3 100644 --- a/src/icu.rs +++ b/src/icu.rs @@ -4,7 +4,7 @@ //! Bindings to the ICU library. use std::cmp::Ordering; -use std::ffi::CStr; +use std::ffi::{CStr, c_char}; use std::mem; use std::mem::MaybeUninit; use std::ops::Range; @@ -15,40 +15,67 @@ use crate::buffer::TextBuffer; use crate::unicode::Utf8Chars; use crate::{apperr, arena_format, sys}; -static mut ENCODINGS: Vec<&'static str> = Vec::new(); +static mut ENCODINGS: Vec = Vec::new(); + +pub struct EncodingInfo { + pub name: &'static str, + pub aliases: Vec<&'static str>, +} /// Returns a list of encodings ICU supports. -pub fn get_available_encodings() -> &'static [&'static str] { +pub fn get_available_encodings() -> &'static Vec { // OnceCell for people that want to put it into a static. #[allow(static_mut_refs)] unsafe { if ENCODINGS.is_empty() { - ENCODINGS.push("UTF-8"); - ENCODINGS.push("UTF-8 BOM"); + ENCODINGS.push(EncodingInfo { name: "UTF-8", aliases: vec![] }); + ENCODINGS.push(EncodingInfo { name: "UTF-8-BOM", aliases: vec![] }); if let Ok(f) = init_if_needed() { let mut n = 0; loop { - let name = (f.ucnv_getAvailableName)(n); - if name.is_null() { + let c_name = (f.ucnv_getAvailableName)(n); + if c_name.is_null() { break; } - let name = CStr::from_ptr(name).to_str().unwrap_unchecked(); + let name = CStr::from_ptr(c_name).to_str().unwrap_unchecked(); // We have already pushed UTF-8 above. // There is no need to filter UTF-8 BOM here, since ICU does not distinguish it from UTF-8. if name != "UTF-8" { - ENCODINGS.push(name); + ENCODINGS.push(EncodingInfo { name, aliases: get_aliases(c_name, f) }) } n += 1; } } } + &ENCODINGS } } +fn get_aliases(name: *const c_char, f: &LibraryFunctions) -> Vec<&'static str> { + let mut status = icu_ffi::U_ZERO_ERROR; + let alias_count = unsafe { (f.ucnv_countAliases)(name, &mut status) }; + if status.is_failure() { + return vec![]; + } + + let mut aliases: Vec<*mut c_char> = vec![null_mut(); alias_count as usize]; + + status = icu_ffi::U_ZERO_ERROR; + unsafe { (f.ucnv_getAliases)(name, aliases.as_mut_ptr(), &mut status) }; + if status.is_failure() { + return vec![]; + } + + aliases + .iter() + .map(|alias| unsafe { CStr::from_ptr(*alias).to_str().unwrap_unchecked() }) + .collect() +} + /// Formats the given ICU error code into a human-readable string. pub fn apperr_format(f: &mut std::fmt::Formatter<'_>, code: u32) -> std::fmt::Result { fn format(code: u32) -> &'static str { @@ -830,6 +857,8 @@ struct LibraryFunctions { // LIBICUUC_PROC_NAMES u_errorName: icu_ffi::u_errorName, ucnv_getAvailableName: icu_ffi::ucnv_getAvailableName, + ucnv_countAliases: icu_ffi::ucnv_countAliases, + ucnv_getAliases: icu_ffi::ucnv_getAliases, ucnv_open: icu_ffi::ucnv_open, ucnv_close: icu_ffi::ucnv_close, ucnv_convertEx: icu_ffi::ucnv_convertEx, @@ -851,10 +880,12 @@ struct LibraryFunctions { ucol_strcollUTF8: icu_ffi::ucol_strcollUTF8, } -const LIBICUUC_PROC_NAMES: [&CStr; 9] = [ +const LIBICUUC_PROC_NAMES: [&CStr; 11] = [ // Found in libicuuc.so on UNIX, icuuc.dll/icu.dll on Windows. c"u_errorName", c"ucnv_getAvailableName", + c"ucnv_countAliases", + c"ucnv_getAliases", c"ucnv_open", c"ucnv_close", c"ucnv_convertEx", @@ -1017,6 +1048,15 @@ mod icu_ffi { pub type ucnv_getAvailableName = unsafe extern "C" fn(n: i32) -> *mut c_char; + pub type ucnv_countAliases = + unsafe extern "C" fn(name: *const c_char, status: &mut UErrorCode) -> u16; + + pub type ucnv_getAliases = unsafe extern "C" fn( + name: *const c_char, + aliases: *mut *mut c_char, + status: &mut UErrorCode, + ); + pub type ucnv_open = unsafe extern "C" fn(converter_name: *const u8, status: &mut UErrorCode) -> *mut UConverter; From 424f79d6bccccb3ed9a5ad0dbfc6f2fef3fd19d2 Mon Sep 17 00:00:00 2001 From: Maciej Bartczak <39600846+maciekbartczak@users.noreply.github.com> Date: Sat, 24 May 2025 16:03:06 +0200 Subject: [PATCH 3/8] Improve alias handling and encoding picker reset logic. --- src/bin/edit/draw_statusbar.rs | 2 ++ src/icu.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/src/bin/edit/draw_statusbar.rs b/src/bin/edit/draw_statusbar.rs index 8a113151cefc..1a498328a71c 100644 --- a/src/bin/edit/draw_statusbar.rs +++ b/src/bin/edit/draw_statusbar.rs @@ -268,6 +268,7 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { change = Some(encoding.name); break; } + ctx.attr_overflow(Overflow::TruncateTail); } ctx.list_end(); } @@ -293,6 +294,7 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { } if ctx.modal_end() { state.wants_encoding_change = StateEncodingChange::None; + state.encoding_picker_needle.clear(); } if let Some(encoding) = change { diff --git a/src/icu.rs b/src/icu.rs index 8120a45694a3..a06b829fade9 100644 --- a/src/icu.rs +++ b/src/icu.rs @@ -72,6 +72,7 @@ fn get_aliases(name: *const c_char, f: &LibraryFunctions) -> Vec<&'static str> { aliases .iter() + .skip(1) // The first alias is the name itself. .map(|alias| unsafe { CStr::from_ptr(*alias).to_str().unwrap_unchecked() }) .collect() } From 2ec8a28e1b2cd1e630a7a585bc20158bc5957c26 Mon Sep 17 00:00:00 2001 From: Maciej Bartczak <39600846+maciekbartczak@users.noreply.github.com> Date: Sat, 24 May 2025 16:06:46 +0200 Subject: [PATCH 4/8] Clear the needle when updating encoding --- src/bin/edit/draw_statusbar.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bin/edit/draw_statusbar.rs b/src/bin/edit/draw_statusbar.rs index 1a498328a71c..135226955246 100644 --- a/src/bin/edit/draw_statusbar.rs +++ b/src/bin/edit/draw_statusbar.rs @@ -313,6 +313,7 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { doc.buffer.borrow_mut().set_encoding(encoding); } + state.encoding_picker_needle.clear(); state.wants_encoding_change = StateEncodingChange::None; ctx.needs_rerender(); } From cb78475357b5c72e5a203a562e71021c576321d8 Mon Sep 17 00:00:00 2001 From: Maciej Bartczak <39600846+maciekbartczak@users.noreply.github.com> Date: Wed, 28 May 2025 21:04:30 +0200 Subject: [PATCH 5/8] use proper name for UTF-8 BOM variant --- src/icu.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/icu.rs b/src/icu.rs index a06b829fade9..5d987f04c0e3 100644 --- a/src/icu.rs +++ b/src/icu.rs @@ -29,7 +29,7 @@ pub fn get_available_encodings() -> &'static Vec { unsafe { if ENCODINGS.is_empty() { ENCODINGS.push(EncodingInfo { name: "UTF-8", aliases: vec![] }); - ENCODINGS.push(EncodingInfo { name: "UTF-8-BOM", aliases: vec![] }); + ENCODINGS.push(EncodingInfo { name: "UTF-8 BOM", aliases: vec![] }); if let Ok(f) = init_if_needed() { let mut n = 0; From 271264c79c771530bdd6243dad2fd5da19f0948d Mon Sep 17 00:00:00 2001 From: Maciej Bartczak <39600846+maciekbartczak@users.noreply.github.com> Date: Wed, 28 May 2025 21:04:49 +0200 Subject: [PATCH 6/8] simplify encoding-picker layout by removing unnecessary table structures --- src/bin/edit/draw_statusbar.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/bin/edit/draw_statusbar.rs b/src/bin/edit/draw_statusbar.rs index 135226955246..5327210c6515 100644 --- a/src/bin/edit/draw_statusbar.rs +++ b/src/bin/edit/draw_statusbar.rs @@ -207,12 +207,7 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { if reopen { loc(LocId::EncodingReopen) } else { loc(LocId::EncodingConvert) }, ); { - ctx.table_begin("encoding-picker"); - ctx.table_set_columns(&[0]); - { - ctx.table_next_row(); - ctx.scrollarea_begin("scrollarea", Size { width, height }); ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4)); ctx.inherit_focus(); @@ -276,8 +271,6 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { } { - ctx.table_next_row(); - ctx.table_begin("encoding-search"); ctx.table_set_columns(&[0, width]); @@ -289,8 +282,6 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { ctx.table_end(); } - - ctx.table_end(); } if ctx.modal_end() { state.wants_encoding_change = StateEncodingChange::None; From 206002ff2a8555c6c9e48b3b3667a3db741e943c Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 5 Jun 2025 18:49:31 +0200 Subject: [PATCH 7/8] Prefer MIME names, Optimize performance --- src/bin/edit/draw_statusbar.rs | 139 ++++++++++++++++---------------- src/bin/edit/state.rs | 14 ++-- src/icu.rs | 140 +++++++++++++++++++-------------- 3 files changed, 154 insertions(+), 139 deletions(-) diff --git a/src/bin/edit/draw_statusbar.rs b/src/bin/edit/draw_statusbar.rs index e4d2fbf005b9..f7e04c87e16c 100644 --- a/src/bin/edit/draw_statusbar.rs +++ b/src/bin/edit/draw_statusbar.rs @@ -1,10 +1,10 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use edit::arena::scratch_arena; use edit::framebuffer::{Attributes, IndexedColor}; use edit::fuzzy::score_fuzzy; use edit::helpers::*; -use edit::icu::EncodingInfo; use edit::input::vk; use edit::tui::*; use edit::{arena_format, icu}; @@ -196,99 +196,62 @@ pub fn draw_statusbar(ctx: &mut Context, state: &mut State) { } pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { - let doc = state.documents.active_mut().unwrap(); + let encoding = state.documents.active_mut().map_or("", |doc| doc.buffer.borrow().encoding()); let reopen = state.wants_encoding_change == StateEncodingChange::Reopen; let width = (ctx.size().width - 20).max(10); let height = (ctx.size().height - 10).max(10); let mut change = None; + let mut done = encoding.is_empty(); ctx.modal_begin( "encode", if reopen { loc(LocId::EncodingReopen) } else { loc(LocId::EncodingConvert) }, ); { + ctx.table_begin("encoding-search"); + ctx.table_set_columns(&[0, COORD_TYPE_SAFE_MAX]); + ctx.table_set_cell_gap(Size { width: 1, height: 0 }); + ctx.inherit_focus(); { - ctx.scrollarea_begin("scrollarea", Size { width, height }); - ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4)); + ctx.table_next_row(); ctx.inherit_focus(); - ctx.focus_on_first_present(); - { - let encodings = icu::get_available_encodings() - .iter() - .filter_map(|enc| { - if state.encoding_picker_needle.is_empty() { - return Some(enc); - } - - let (name_score, _) = - score_fuzzy(ctx.arena(), enc.name, &state.encoding_picker_needle, true); - if name_score > 0 { - return Some(enc); - } + ctx.label("needle-label", loc(LocId::SearchNeedleLabel)); - let alias_matches = enc.aliases.iter().any(|alias| { - let (alias_score, _) = score_fuzzy( - ctx.arena(), - alias, - &state.encoding_picker_needle, - true, - ); - alias_score > 0 - }); - - if alias_matches { Some(enc) } else { None } - }) - .collect::>(); - - ctx.list_begin("encodings"); - ctx.inherit_focus(); - - for encoding in encodings { - let label = if encoding.aliases.is_empty() { - encoding.name - } else { - let aliases = encoding - .aliases - .iter() - .map(|alias| format!("\"{}\"", alias)) - .collect::>() - .join(", "); - &format!("{} ({})", encoding.name, aliases) - }; - - if ctx.list_item(encoding.name == doc.buffer.borrow().encoding(), label) - == ListSelection::Activated - { - change = Some(encoding.name); - break; - } - ctx.attr_overflow(Overflow::TruncateTail); - } - ctx.list_end(); + if ctx.editline("needle", &mut state.encoding_picker_needle) { + encoding_picker_update_list(state); } - ctx.scrollarea_end(); + ctx.inherit_focus(); } + ctx.table_end(); + ctx.scrollarea_begin("scrollarea", Size { width, height }); + ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4)); { - ctx.table_begin("encoding-search"); - ctx.table_set_columns(&[0, width]); - - ctx.table_next_row(); - - ctx.label("needle-label", loc(LocId::SearchNeedleLabel)); - ctx.editline("needle", &mut state.encoding_picker_needle); + ctx.list_begin("encodings"); ctx.inherit_focus(); - ctx.table_end(); + for enc in state + .encoding_picker_results + .as_deref() + .unwrap_or_else(|| icu::get_available_encodings().preferred) + { + if ctx.list_item(enc.canonical == encoding, enc.label) == ListSelection::Activated { + change = Some(enc.canonical); + break; + } + ctx.attr_overflow(Overflow::TruncateTail); + } + ctx.list_end(); } + ctx.scrollarea_end(); } - if ctx.modal_end() { - state.wants_encoding_change = StateEncodingChange::None; - state.encoding_picker_needle.clear(); - } + done |= ctx.modal_end(); + done |= change.is_some(); - if let Some(encoding) = change { + if let Some(encoding) = change + && let Some(doc) = state.documents.active_mut() + { if reopen && doc.path.is_some() { let mut res = Ok(()); if doc.buffer.borrow().is_dirty() { @@ -303,13 +266,45 @@ pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { } else { doc.buffer.borrow_mut().set_encoding(encoding); } + } - state.encoding_picker_needle.clear(); + if done { state.wants_encoding_change = StateEncodingChange::None; + state.encoding_picker_needle.clear(); + state.encoding_picker_results = None; ctx.needs_rerender(); } } +fn encoding_picker_update_list(state: &mut State) { + state.encoding_picker_results = None; + + let needle = state.encoding_picker_needle.trim_ascii(); + if needle.is_empty() { + return; + } + + let encodings = icu::get_available_encodings(); + let scratch = scratch_arena(None); + let mut matches = Vec::new_in(&*scratch); + + for enc in encodings.all { + let local_scratch = scratch_arena(Some(&scratch)); + let (score, _) = score_fuzzy(&local_scratch, enc.label, needle, true); + + if score > 0 { + matches.push((score, *enc)); + } + } + + if matches.is_empty() { + return; + } + + matches.sort_by(|a, b| b.0.cmp(&a.0)); + state.encoding_picker_results = Some(Vec::from_iter(matches.iter().map(|(_, enc)| *enc))); +} + pub fn draw_document_picker(ctx: &mut Context, state: &mut State) { ctx.modal_begin("document-picker", ""); { diff --git a/src/bin/edit/state.rs b/src/bin/edit/state.rs index bc41f8e5a1b9..ab02a2b6e24e 100644 --- a/src/bin/edit/state.rs +++ b/src/bin/edit/state.rs @@ -144,9 +144,12 @@ pub struct State { pub search_options: buffer::SearchOptions, pub search_success: bool, + pub wants_encoding_picker: bool, + pub encoding_picker_needle: String, + pub encoding_picker_results: Option>, + pub wants_save: bool, pub wants_statusbar_focus: bool, - pub wants_encoding_picker: bool, pub wants_encoding_change: StateEncodingChange, pub wants_indentation_picker: bool, pub wants_document_picker: bool, @@ -162,8 +165,6 @@ pub struct State { pub osc_clipboard_send_generation: u32, pub osc_clipboard_always_send: bool, pub exit: bool, - - pub encoding_picker_needle: String, } impl State { @@ -191,9 +192,12 @@ impl State { search_options: Default::default(), search_success: true, + wants_encoding_picker: false, + encoding_picker_needle: Default::default(), + encoding_picker_results: Default::default(), + wants_save: false, wants_statusbar_focus: false, - wants_encoding_picker: false, wants_encoding_change: StateEncodingChange::None, wants_indentation_picker: false, wants_document_picker: false, @@ -209,8 +213,6 @@ impl State { osc_clipboard_send_generation: 0, osc_clipboard_always_send: false, exit: false, - - encoding_picker_needle: Default::default(), }) } } diff --git a/src/icu.rs b/src/icu.rs index 2e0645869c25..c96c86b7a537 100644 --- a/src/icu.rs +++ b/src/icu.rs @@ -4,7 +4,7 @@ //! Bindings to the ICU library. use std::cmp::Ordering; -use std::ffi::{CStr, c_char}; +use std::ffi::CStr; use std::mem; use std::mem::MaybeUninit; use std::ops::Range; @@ -15,66 +15,80 @@ use crate::buffer::TextBuffer; use crate::unicode::Utf8Chars; use crate::{apperr, arena_format, sys}; -static mut ENCODINGS: Vec = Vec::new(); +#[derive(Clone, Copy)] +pub struct Encoding { + pub label: &'static str, + pub canonical: &'static str, +} -pub struct EncodingInfo { - pub name: &'static str, - pub aliases: Vec<&'static str>, +pub struct Encodings { + pub preferred: &'static [Encoding], + pub all: &'static [Encoding], } +static mut ENCODINGS: Encodings = Encodings { preferred: &[], all: &[] }; + /// Returns a list of encodings ICU supports. -pub fn get_available_encodings() -> &'static Vec { +pub fn get_available_encodings() -> &'static Encodings { // OnceCell for people that want to put it into a static. #[allow(static_mut_refs)] unsafe { - if ENCODINGS.is_empty() { - ENCODINGS.push(EncodingInfo { name: "UTF-8", aliases: vec![] }); - ENCODINGS.push(EncodingInfo { name: "UTF-8 BOM", aliases: vec![] }); + if ENCODINGS.all.is_empty() { + let scratch = scratch_arena(None); + let mut preferred = Vec::new_in(&*scratch); + let mut alternative = Vec::new_in(&*scratch); + + // These encodings are always available. + preferred.push(Encoding { label: "UTF-8", canonical: "UTF-8" }); + preferred.push(Encoding { label: "UTF-8 BOM", canonical: "UTF-8 BOM" }); if let Ok(f) = init_if_needed() { let mut n = 0; loop { - let c_name = (f.ucnv_getAvailableName)(n); - if c_name.is_null() { + let name = (f.ucnv_getAvailableName)(n); + if name.is_null() { break; } - let name = CStr::from_ptr(c_name).to_str().unwrap_unchecked(); - // We have already pushed UTF-8 above. - // There is no need to filter UTF-8 BOM here, since ICU does not distinguish it from UTF-8. - if name != "UTF-8" { - ENCODINGS.push(EncodingInfo { name, aliases: get_aliases(c_name, f) }) + n += 1; + + let name = CStr::from_ptr(name).to_str().unwrap_unchecked(); + // We have already pushed UTF-8 above and can skip it. + // There is no need to filter UTF-8 BOM here, + // since ICU does not distinguish it from UTF-8. + if name.is_empty() || name == "UTF-8" { + continue; } - n += 1; + let mut status = icu_ffi::U_ZERO_ERROR; + let mime = (f.ucnv_getStandardName)( + name.as_ptr(), + c"MIME".as_ptr() as *const _, + &mut status, + ); + if !mime.is_null() && status.is_success() { + let mime = CStr::from_ptr(mime).to_str().unwrap_unchecked(); + preferred.push(Encoding { label: mime, canonical: name }); + } else { + alternative.push(Encoding { label: name, canonical: name }); + } } } - } - &ENCODINGS - } -} + let preferred_len = preferred.len(); -fn get_aliases(name: *const c_char, f: &LibraryFunctions) -> Vec<&'static str> { - let mut status = icu_ffi::U_ZERO_ERROR; - let alias_count = unsafe { (f.ucnv_countAliases)(name, &mut status) }; - if status.is_failure() { - return vec![]; - } + // Combine the preferred and alternative encodings into a single list. + let mut all = Vec::with_capacity(preferred.len() + alternative.len()); + all.extend(preferred); + all.extend(alternative); - let mut aliases: Vec<*mut c_char> = vec![null_mut(); alias_count as usize]; + let all = all.leak(); + ENCODINGS.preferred = &all[..preferred_len]; + ENCODINGS.all = &all[..]; + } - status = icu_ffi::U_ZERO_ERROR; - unsafe { (f.ucnv_getAliases)(name, aliases.as_mut_ptr(), &mut status) }; - if status.is_failure() { - return vec![]; + &ENCODINGS } - - aliases - .iter() - .skip(1) // The first alias is the name itself. - .map(|alias| unsafe { CStr::from_ptr(*alias).to_str().unwrap_unchecked() }) - .collect() } /// Formats the given ICU error code into a human-readable string. @@ -855,6 +869,15 @@ pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> { result } +// NOTE: +// To keep this neat, fields are ordered by prefix (= `ucol_` before `uregex_`), +// followed by functions in this order: +// * Static methods (e.g. `ucnv_getAvailableName`) +// * Constructors (e.g. `ucnv_open`) +// * Destructors (e.g. `ucnv_close`) +// * Methods, grouped by relationship +// (e.g. `uregex_start64` and `uregex_end64` are near each other) +// // WARNING: // The order of the fields MUST match the order of strings in the following two arrays. #[allow(non_snake_case)] @@ -862,18 +885,19 @@ pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> { struct LibraryFunctions { // LIBICUUC_PROC_NAMES u_errorName: icu_ffi::u_errorName, + ucasemap_open: icu_ffi::ucasemap_open, + ucasemap_utf8FoldCase: icu_ffi::ucasemap_utf8FoldCase, ucnv_getAvailableName: icu_ffi::ucnv_getAvailableName, - ucnv_countAliases: icu_ffi::ucnv_countAliases, - ucnv_getAliases: icu_ffi::ucnv_getAliases, + ucnv_getStandardName: icu_ffi::ucnv_getStandardName, ucnv_open: icu_ffi::ucnv_open, ucnv_close: icu_ffi::ucnv_close, ucnv_convertEx: icu_ffi::ucnv_convertEx, - ucasemap_open: icu_ffi::ucasemap_open, - ucasemap_utf8FoldCase: icu_ffi::ucasemap_utf8FoldCase, utext_setup: icu_ffi::utext_setup, utext_close: icu_ffi::utext_close, // LIBICUI18N_PROC_NAMES + ucol_open: icu_ffi::ucol_open, + ucol_strcollUTF8: icu_ffi::ucol_strcollUTF8, uregex_open: icu_ffi::uregex_open, uregex_close: icu_ffi::uregex_close, uregex_setTimeLimit: icu_ffi::uregex_setTimeLimit, @@ -882,27 +906,26 @@ struct LibraryFunctions { uregex_findNext: icu_ffi::uregex_findNext, uregex_start64: icu_ffi::uregex_start64, uregex_end64: icu_ffi::uregex_end64, - ucol_open: icu_ffi::ucol_open, - ucol_strcollUTF8: icu_ffi::ucol_strcollUTF8, } -const LIBICUUC_PROC_NAMES: [&CStr; 11] = [ - // Found in libicuuc.so on UNIX, icuuc.dll/icu.dll on Windows. +// Found in libicuuc.so on UNIX, icuuc.dll/icu.dll on Windows. +const LIBICUUC_PROC_NAMES: [&CStr; 10] = [ c"u_errorName", + c"ucasemap_open", + c"ucasemap_utf8FoldCase", c"ucnv_getAvailableName", - c"ucnv_countAliases", - c"ucnv_getAliases", + c"ucnv_getStandardName", c"ucnv_open", c"ucnv_close", c"ucnv_convertEx", - c"ucasemap_open", - c"ucasemap_utf8FoldCase", c"utext_setup", c"utext_close", ]; +// Found in libicui18n.so on UNIX, icuin.dll/icu.dll on Windows. const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [ - // Found in libicui18n.so on UNIX, icuin.dll/icu.dll on Windows. + c"ucol_open", + c"ucol_strcollUTF8", c"uregex_open", c"uregex_close", c"uregex_setTimeLimit", @@ -911,8 +934,6 @@ const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [ c"uregex_findNext", c"uregex_start64", c"uregex_end64", - c"ucol_open", - c"ucol_strcollUTF8", ]; enum LibraryFunctionsState { @@ -1052,16 +1073,13 @@ mod icu_ffi { pub struct UConverter; - pub type ucnv_getAvailableName = unsafe extern "C" fn(n: i32) -> *mut c_char; + pub type ucnv_getAvailableName = unsafe extern "C" fn(n: i32) -> *const c_char; - pub type ucnv_countAliases = - unsafe extern "C" fn(name: *const c_char, status: &mut UErrorCode) -> u16; - - pub type ucnv_getAliases = unsafe extern "C" fn( - name: *const c_char, - aliases: *mut *mut c_char, + pub type ucnv_getStandardName = unsafe extern "C" fn( + name: *const u8, + standard: *const u8, status: &mut UErrorCode, - ); + ) -> *const c_char; pub type ucnv_open = unsafe extern "C" fn(converter_name: *const u8, status: &mut UErrorCode) -> *mut UConverter; From ac1b5cf9d6d4e9237965cfefb9e044e541a3da99 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 5 Jun 2025 19:41:58 +0200 Subject: [PATCH 8/8] No results = Empty list --- src/bin/edit/draw_statusbar.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/bin/edit/draw_statusbar.rs b/src/bin/edit/draw_statusbar.rs index f7e04c87e16c..4dc08a74d75a 100644 --- a/src/bin/edit/draw_statusbar.rs +++ b/src/bin/edit/draw_statusbar.rs @@ -297,10 +297,6 @@ fn encoding_picker_update_list(state: &mut State) { } } - if matches.is_empty() { - return; - } - matches.sort_by(|a, b| b.0.cmp(&a.0)); state.encoding_picker_results = Some(Vec::from_iter(matches.iter().map(|(_, enc)| *enc))); }