Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions api/internal/model/msg.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,23 @@ func ParseMsg(data []byte) (Msg, error) {

subject := msg.Header.Get("Subject")

to := make([]string, 0)
addresses, err := mail.ParseAddressList(msg.Header.Get("To"))
originalMsg, err := mail.ReadMessage(bytes.NewReader(data))
if err != nil {
return Msg{}, err
}

to := make([]string, 0)
addresses, err := mail.ParseAddressList(originalMsg.Header.Get("To"))
if err != nil {
return Msg{}, fmt.Errorf("error parsing To header: %w", err)
}
for _, address := range addresses {
to = append(to, address.Address)
}

from, err := mail.ParseAddress(msg.Header.Get("From"))
if err != nil {
return Msg{}, err
return Msg{}, fmt.Errorf("error parsing From header: %w", err)
}
fromAddress := from.Address

Expand Down
48 changes: 47 additions & 1 deletion api/internal/model/msg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,53 @@ func TestParseMsg(t *testing.T) {
Type: Reply,
},
},
}
{
// Regression: From with an ISO-8859-2 RFC 2047 encoded-word combined with
// a To address whose local part contains '='. Previously PreprocessEmailData
// decoded address headers using a no-op charset reader, producing invalid
// UTF-8 that caused mail.ParseAddress to return "missing '@' or angle-addr".
name: "From with iso-8859-2 encoded display name and To with equals in local part",
data: "From: John =?iso-8859-2?q?D=F6e?= <john.doe@example.com>\r\nTo: Jane Doe <jane.doe+alias=example.net@example.com>\r\nSubject: Hello\r\nIn-Reply-To: <abc@example.com>\r\n\r\nBody",
want: Msg{
From: "john.doe@example.com",
// iso-8859-2 is unsupported; SafeDecodeAddressName strips the encoded
// word and returns only the plain-text prefix.
FromName: "John",
To: []string{"jane.doe+alias=example.net@example.com"},
Subject: "Hello",
Body: "Body",
Type: Reply,
},
},
{
// Regression: From with a UTF-8 QP RFC 2047 encoded display name combined
// with a To address whose local part contains '='.
name: "From with UTF-8 QP encoded display name and To with equals in local part",
data: "From: =?UTF-8?Q?John_D=C5=8De?= <john.doe@example.com>\r\nTo: Jane Doe <jane.doe+alias=example.net@example.com>\r\nSubject: Hi\r\n\r\nBody",
want: Msg{
From: "john.doe@example.com",
FromName: "John D\u014de",
To: []string{"jane.doe+alias=example.net@example.com"},
Subject: "Hi",
Body: "Body",
Type: Send,
},
}, {
// Regression: From display name uses an empty-charset RFC 2047 encoded
// word (=??q?...?=). PreprocessEmailData rewrites it to UTF-8, so
// mail.ParseAddress can decode it, and SafeDecodeAddressName returns
// the human-readable name.
name: "From with empty-charset encoded display name",
data: "From: =??q?Service_Support?= <support@example.com>\r\nTo: john.doe@example.com\r\nSubject: Hello\r\n\r\nBody",
want: Msg{
From: "support@example.com",
FromName: "Service Support",
To: []string{"john.doe@example.com"},
Subject: "Hello",
Body: "Body",
Type: Send,
},
}}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
90 changes: 53 additions & 37 deletions api/internal/utils/email.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"regexp"
"strings"
"time"
"unicode/utf8"

"github.com/ProtonMail/gopenpgp/v3/crypto"
"ivpn.net/email/api/internal/utils/gomail.v2"
Expand Down Expand Up @@ -151,54 +152,45 @@ func cryptoRandInt(max int) (int, error) {
return int(nBig.Int64()), nil
}

// PreprocessEmailData decodes RFC 2047 encoded headers to fix parsing issues
// with email addresses containing encoded display names
// emptyCharsetEncodedWordRe matches RFC 2047 encoded words with an empty
// charset field, e.g. =??q?text?= or =??B?text?=.
var emptyCharsetEncodedWordRe = regexp.MustCompile(`=\?\?([bqBQ]\?[^?]*)\?=`)

// fixEmptyCharsetEncodedWords replaces malformed RFC 2047 encoded words that
// have an empty charset (=??encoding?content?=) with UTF-8 as the charset.
// Some mailers omit the charset field while still encoding content as UTF-8.
// Without this fix the letters library (and other strict parsers) return
// "cannot lookup encoding" and refuse to process the message.
func fixEmptyCharsetEncodedWords(s string) string {
return emptyCharsetEncodedWordRe.ReplaceAllString(s, "=?UTF-8?$1?=")
}

// PreprocessEmailData normalises raw email bytes so that the standard
// net/mail parser and the letters library can handle them reliably.
// It deliberately does NOT attempt to decode RFC 2047 encoded-words in
// address headers (From, To, Cc, …): Go's net/mail.ParseAddress already
// handles RFC 2047 display names natively, and running a second decode
// pass with a no-op charset reader produces invalid UTF-8 bytes that
// cause parsing to fail. Use SafeDecodeAddressName to decode display
// names after parsing.
//
// It does fix malformed encoded words with an empty charset field
// (=??q?…?=) by substituting UTF-8, so that strict third-party parsers
// such as letters do not abort with "cannot lookup encoding".
func PreprocessEmailData(data []byte) ([]byte, error) {
msg, err := mail.ReadMessage(bytes.NewReader(data))
if err != nil {
return data, nil // Return original data if it can't be parsed
}

decoder := mime.WordDecoder{
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
// Default charset handling
return input, nil
},
}

// Headers that commonly contain RFC 2047 encoded addresses
addressHeaders := []string{"From", "To", "Cc", "Bcc", "Reply-To", "Sender"}

var buf bytes.Buffer

// Write headers
// Write headers, fixing any malformed encoded words in each value.
for key := range msg.Header {
values := msg.Header[key]
for _, value := range values {
// Try to decode RFC 2047 encoded-words for address headers
needsDecoding := false
for _, addrHeader := range addressHeaders {
if strings.EqualFold(key, addrHeader) {
needsDecoding = true
break
}
}

if needsDecoding && strings.Contains(value, "=?") {
// Decode the RFC 2047 encoded display name
decoded, err := decoder.DecodeHeader(value)
if err == nil {
value = decoded
} else {
// If decoding fails (e.g., malformed base64), try to clean it up
// Extract just the email address part if possible
value = CleanupMalformedEncodedAddress(value)
}
}

for _, value := range msg.Header[key] {
buf.WriteString(key)
buf.WriteString(": ")
buf.WriteString(value)
buf.WriteString(fixEmptyCharsetEncodedWords(value))
buf.WriteString("\r\n")
}
}
Expand All @@ -215,6 +207,30 @@ func PreprocessEmailData(data []byte) ([]byte, error) {
return buf.Bytes(), nil
}

// SafeDecodeAddressName decodes an RFC 2047 encoded display name returned
// by net/mail.ParseAddress. If the decoded string is not valid UTF-8
// (e.g. the encoded word used a non-UTF-8 charset and the bytes were
// returned verbatim), it falls back to stripping encoded-word tokens and
// returning only the plain-text fragments, producing a best-effort
// human-readable name rather than a garbled or empty string.
func SafeDecodeAddressName(name string) string {
if !strings.Contains(name, "=?") {
return name
}

decoder := mime.WordDecoder{}
decoded, err := decoder.DecodeHeader(name)
if err == nil && utf8.ValidString(decoded) {
return decoded
}

// Fallback: strip encoded-word tokens (including those with an empty
// charset, e.g. =??q?…?=) and keep only the plain-text fragments.
re := regexp.MustCompile(`=\?[^?]*\?[bqBQ]\?[^?]*\?=`)
plain := re.ReplaceAllString(name, "")
return strings.TrimSpace(plain)
}

// CleanupMalformedEncodedAddress attempts to extract a valid email address
// from a malformed RFC 2047 encoded string
func CleanupMalformedEncodedAddress(addr string) string {
Expand Down
152 changes: 152 additions & 0 deletions api/internal/utils/email_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package utils

import (
"strings"
"testing"
)

Expand Down Expand Up @@ -132,3 +133,154 @@ func TestRemoveHtmlHeader(t *testing.T) {
})
}
}

func TestSafeDecodeAddressName(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "plain name without encoding",
input: "John Doe",
expected: "John Doe",
},
{
name: "empty string",
input: "",
expected: "",
},
{
name: "UTF-8 base64 encoded word",
input: "=?UTF-8?B?SmFuZSBEb2U=?=",
expected: "Jane Doe",
},
{
// =?UTF-8?Q?John_D=C5=8De?= encodes "John Dōe" (ō = U+014D)
name: "UTF-8 QP encoded word with multibyte char",
input: "=?UTF-8?Q?John_D=C5=8De?=",
expected: "John D\u014de",
},
{
// When the charset is iso-8859-2, Go's mime.WordDecoder returns raw bytes
// that are not valid UTF-8. SafeDecodeAddressName must fall back to
// stripping the encoded word and returning only the plain-text prefix.
name: "iso-8859-2 encoded word falls back to plain-text prefix",
input: "John =?iso-8859-2?q?D=F6e?=",
expected: "John",
},
{
name: "no encoded words passed through unchanged",
input: "John Doe",
expected: "John Doe",
},
{
name: "us-ascii QP encoded word is decoded normally",
input: "=?us-ascii?Q?John_Doe?=",
expected: "John Doe",
},
{
// =??q?...?= has an empty charset. SafeDecodeAddressName decoding
// fails (empty charset lookup), so the fallback regex strips it.
// In practice, PreprocessEmailData fixes this before it reaches here,
// but SafeDecodeAddressName must be safe against it regardless.
name: "empty charset encoded word stripped by fallback",
input: "Service =??q?Support_Team?=",
expected: "Service",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := SafeDecodeAddressName(tt.input)
if result != tt.expected {
t.Errorf("SafeDecodeAddressName(%q) = %q, want %q", tt.input, result, tt.expected)
}
})
}
}

func TestFixEmptyCharsetEncodedWords(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "empty charset QP encoded word is fixed",
input: "=??q?Hello_World?=",
expected: "=?UTF-8?q?Hello_World?=",
},
{
name: "empty charset base64 encoded word is fixed",
input: "=??B?SGVsbG8gV29ybGQ=?=",
expected: "=?UTF-8?B?SGVsbG8gV29ybGQ=?=",
},
{
name: "well-formed UTF-8 encoded word is unchanged",
input: "=?UTF-8?q?Hello_World?=",
expected: "=?UTF-8?q?Hello_World?=",
},
{
name: "well-formed iso-8859-1 encoded word is unchanged",
input: "=?iso-8859-1?q?Hello?=",
expected: "=?iso-8859-1?q?Hello?=",
},
{
name: "plain text without encoded words is unchanged",
input: "John Doe <john.doe@example.com>",
expected: "John Doe <john.doe@example.com>",
},
{
// Regression: display name with empty charset followed by address with '=' in local part.
name: "empty charset encoded word in display name alongside address with equals",
input: "=??q?Service_Name?= <jane.doe+tag=example.net@example.com>",
expected: "=?UTF-8?q?Service_Name?= <jane.doe+tag=example.net@example.com>",
},
{
name: "multiple encoded words, one with empty charset",
input: "=??q?First?= =?UTF-8?q?Second?=",
expected: "=?UTF-8?q?First?= =?UTF-8?q?Second?=",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := fixEmptyCharsetEncodedWords(tt.input)
if result != tt.expected {
t.Errorf("fixEmptyCharsetEncodedWords(%q) = %q, want %q", tt.input, result, tt.expected)
}
})
}
}

func TestPreprocessEmailData_EmptyCharsetEncodedWord(t *testing.T) {
// Regression: letters.ParseEmail fails with "cannot lookup encoding" when
// the From or To header contains an RFC 2047 encoded word with an empty
// charset field (=??q?…?=). PreprocessEmailData must rewrite these to
// =?UTF-8?q?…?= before returning.
input := strings.Join([]string{
"From: =??q?Service_Support?= <support@example.com>",
"To: John Doe <john.doe+tag=example.net@example.com>",
"Subject: Hello",
"",
"Body text",
}, "\r\n")

processed, err := PreprocessEmailData([]byte(input))
if err != nil {
t.Fatalf("PreprocessEmailData() error = %v", err)
}

processedStr := string(processed)
if strings.Contains(processedStr, "=??q?") {
t.Errorf("processed data still contains empty-charset encoded word: %q", processedStr)
}
if !strings.Contains(processedStr, "=?UTF-8?q?Service_Support?=") {
t.Errorf("processed data missing fixed encoded word; got: %q", processedStr)
}
// The To address with '=' in the local part must not be corrupted.
if !strings.Contains(processedStr, "john.doe+tag=example.net@example.com") {
t.Errorf("processed data corrupted To address; got: %q", processedStr)
}
}
Loading