Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

echo: use uucore::format::parse_escape_only() #7316

Merged
merged 1 commit into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/uu/echo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ path = "src/echo.rs"

[dependencies]
clap = { workspace = true }
uucore = { workspace = true }
uucore = { workspace = true, features = ["format"] }

[[bin]]
name = "echo"
Expand Down
241 changes: 6 additions & 235 deletions src/uu/echo/src/echo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::env;
use std::ffi::{OsStr, OsString};
use std::io::{self, StdoutLock, Write};
use std::iter::Peekable;
use std::ops::ControlFlow;
use std::slice::Iter;
use uucore::error::{UResult, USimpleError};
use uucore::format::{parse_escape_only, EscapedChar, FormatChar};
use uucore::{format_usage, help_about, help_section, help_usage};

const ABOUT: &str = help_about!("echo.md");
Expand All @@ -25,236 +23,6 @@ mod options {
pub const DISABLE_BACKSLASH_ESCAPE: &str = "disable_backslash_escape";
}

enum BackslashNumberType {
OctalStartingWithNonZero(u8),
OctalStartingWithZero,
Hexadecimal,
}

impl BackslashNumberType {
fn base(&self) -> Base {
match self {
BackslashNumberType::OctalStartingWithZero
| BackslashNumberType::OctalStartingWithNonZero(_) => Base::Octal,
BackslashNumberType::Hexadecimal => Base::Hexadecimal,
}
}
}

enum Base {
Octal,
Hexadecimal,
}

impl Base {
fn ascii_to_number(&self, digit: u8) -> Option<u8> {
fn octal_ascii_digit_to_number(digit: u8) -> Option<u8> {
let number = match digit {
b'0' => 0,
b'1' => 1,
b'2' => 2,
b'3' => 3,
b'4' => 4,
b'5' => 5,
b'6' => 6,
b'7' => 7,
_ => {
return None;
}
};

Some(number)
}

fn hexadecimal_ascii_digit_to_number(digit: u8) -> Option<u8> {
let number = match digit {
b'0' => 0,
b'1' => 1,
b'2' => 2,
b'3' => 3,
b'4' => 4,
b'5' => 5,
b'6' => 6,
b'7' => 7,
b'8' => 8,
b'9' => 9,
b'A' | b'a' => 10,
b'B' | b'b' => 11,
b'C' | b'c' => 12,
b'D' | b'd' => 13,
b'E' | b'e' => 14,
b'F' | b'f' => 15,
_ => {
return None;
}
};

Some(number)
}

match self {
Self::Octal => octal_ascii_digit_to_number(digit),
Self::Hexadecimal => hexadecimal_ascii_digit_to_number(digit),
}
}

fn maximum_number_of_digits(&self) -> u8 {
match self {
Self::Octal => 3,
Self::Hexadecimal => 2,
}
}

fn radix(&self) -> u8 {
match self {
Self::Octal => 8,
Self::Hexadecimal => 16,
}
}
}

/// Parse the numeric part of `\xHHH`, `\0NNN`, and `\NNN` escape sequences
fn parse_backslash_number(
input: &mut Peekable<Iter<u8>>,
backslash_number_type: BackslashNumberType,
) -> Option<u8> {
let first_digit_ascii = match backslash_number_type {
BackslashNumberType::OctalStartingWithZero | BackslashNumberType::Hexadecimal => {
match input.peek() {
Some(&&digit_ascii) => digit_ascii,
None => {
// One of the following cases: argument ends with "\0" or "\x"
// If "\0" (octal): caller will print not ASCII '0', 0x30, but ASCII '\0' (NUL), 0x00
// If "\x" (hexadecimal): caller will print literal "\x"
return None;
}
}
}
// Never returns early when backslash number starts with "\1" through "\7", because caller provides the
// first digit
BackslashNumberType::OctalStartingWithNonZero(digit_ascii) => digit_ascii,
};

let base = backslash_number_type.base();

let first_digit_number = match base.ascii_to_number(first_digit_ascii) {
Some(digit_number) => {
// Move past byte, since it was successfully parsed
let _ = input.next();

digit_number
}
None => {
// The first digit was not a valid octal or hexadecimal digit
// This should never be the case when the backslash number starts with "\1" through "\7"
// (caller unwraps to verify this)
return None;
}
};

let radix = base.radix();

let mut sum = first_digit_number;

for _ in 1..(base.maximum_number_of_digits()) {
match input
.peek()
.and_then(|&&digit_ascii| base.ascii_to_number(digit_ascii))
{
Some(digit_number) => {
// Move past byte, since it was successfully parsed
let _ = input.next();

// All arithmetic on `sum` needs to be wrapping, because octal input can
// take 3 digits, which is 9 bits, and therefore more than what fits in a
// `u8`.
//
// GNU Core Utilities: "if nnn is a nine-bit value, the ninth bit is ignored"
// https://www.gnu.org/software/coreutils/manual/html_node/echo-invocation.html
sum = sum.wrapping_mul(radix).wrapping_add(digit_number);
}
None => {
break;
}
}
}

Some(sum)
}

fn print_escaped(input: &[u8], output: &mut StdoutLock) -> io::Result<ControlFlow<()>> {
let mut iter = input.iter().peekable();

while let Some(&current_byte) = iter.next() {
if current_byte != b'\\' {
output.write_all(&[current_byte])?;

continue;
}

// This is for the \NNN syntax for octal sequences
// Note that '0' is intentionally omitted, because the \0NNN syntax is handled below
if let Some(&&first_digit @ b'1'..=b'7') = iter.peek() {
// Unwrap because anything starting with "\1" through "\7" can be successfully parsed
let parsed_octal_number = parse_backslash_number(
&mut iter,
BackslashNumberType::OctalStartingWithNonZero(first_digit),
)
.unwrap();

output.write_all(&[parsed_octal_number])?;

continue;
}

if let Some(next) = iter.next() {
let unescaped: &[u8] = match *next {
b'\\' => br"\",
b'a' => b"\x07",
b'b' => b"\x08",
b'c' => return Ok(ControlFlow::Break(())),
b'e' => b"\x1B",
b'f' => b"\x0C",
b'n' => b"\n",
b'r' => b"\r",
b't' => b"\t",
b'v' => b"\x0B",
b'x' => {
if let Some(parsed_hexadecimal_number) =
parse_backslash_number(&mut iter, BackslashNumberType::Hexadecimal)
{
&[parsed_hexadecimal_number]
} else {
// "\x" with any non-hexadecimal digit after means "\x" is treated literally
br"\x"
}
}
b'0' => {
if let Some(parsed_octal_number) = parse_backslash_number(
&mut iter,
BackslashNumberType::OctalStartingWithZero,
) {
&[parsed_octal_number]
} else {
// "\0" with any non-octal digit after it means "\0" is treated as ASCII '\0' (NUL), 0x00
b"\0"
}
}
other_byte => {
// Backslash and the following byte are treated literally
&[b'\\', other_byte]
}
};

output.write_all(unescaped)?;
} else {
output.write_all(br"\")?;
}
}

Ok(ControlFlow::Continue(()))
}

// A workaround because clap interprets the first '--' as a marker that a value
// follows. In order to use '--' as a value, we have to inject an additional '--'
fn handle_double_hyphens(args: impl uucore::Args) -> impl uucore::Args {
Expand Down Expand Up @@ -367,8 +135,11 @@ fn execute(
}

if escaped {
if print_escaped(bytes, stdout_lock)?.is_break() {
return Ok(());
for item in parse_escape_only(bytes) {
match item {
EscapedChar::End => return Ok(()),
c => c.write(&mut *stdout_lock)?,
};
}
} else {
stdout_lock.write_all(bytes)?;
Expand Down
Loading