From 3e62ae0b2111a262de786eabbc90d127820ae810 Mon Sep 17 00:00:00 2001 From: Ren Kararou Date: Wed, 29 Nov 2023 09:05:14 -0600 Subject: [PATCH] printf stuff --- usr/src/mei/printf/formats.7.man | 355 ++++++++++++++++ usr/src/mei/printf/printf.1.man | 692 +++++++++++++++++++++++++++++++ usr/src/mei/printf/printf.c | 643 ++++++++++++++++++++++++++++ usr/src/mei/printf/src/printf.rs | 148 ++++--- 4 files changed, 1772 insertions(+), 66 deletions(-) create mode 100644 usr/src/mei/printf/formats.7.man create mode 100644 usr/src/mei/printf/printf.1.man create mode 100644 usr/src/mei/printf/printf.c diff --git a/usr/src/mei/printf/formats.7.man b/usr/src/mei/printf/formats.7.man new file mode 100644 index 0000000..c10ccdf --- /dev/null +++ b/usr/src/mei/printf/formats.7.man @@ -0,0 +1,355 @@ +FORMATS(7) Standards, Environments, and Macros FORMATS(7) + +NNAAMMEE + formats - file format notation + +DDEESSCCRRIIPPTTIIOONN + Utility descriptions use a syntax to describe the data organization + within files—stdin, stdout, stderr, input files, and output files—when + that organization is not otherwise obvious. The syntax is similar to + that used by the pprriinnttff(3C) function. When used for stdin or input + file descriptions, this syntax describes the format that could have + been used to write the text to be read, not a format that could be used + by the ssccaannff(3C) function to read the input file. + + FFoorrmmaatt + The description of an individual record is as follows: + + "", [<_a_r_g_1>, <_a_r_g_2>, ..., <_a_r_g_n>] + + + + The ffoorrmmaatt is a character string that contains three types of objects + defined below: + + _c_h_a_r_a_c_t_e_r_s + Characters that are not _e_s_c_a_p_e _s_e_q_u_e_n_c_e_s + or _c_o_n_v_e_r_s_i_o_n _s_p_e_c_i_f_i_c_a_t_i_o_n_s, as + described below, are copied to the + output. + + + _e_s_c_a_p_e _s_e_q_u_e_n_c_e_s + Represent non-graphic characters. + + + _c_o_n_v_e_r_s_i_o_n _s_p_e_c_i_f_i_c_a_t_i_o_n_s + Specifies the output format of each + argument. (See below.) + + + + The following characters have the following special meaning in the + format string: + + ```` '''' + (An empty character position.) One or more blank characters. + + + //\\ + Exactly one space character. + + + + The notation for spaces allows some flexibility for application output. + Note that an empty character position in ffoorrmmaatt represents one or more + blank characters on the output (not _w_h_i_t_e _s_p_a_c_e, which can include + newline characters). Therefore, another utility that reads that output + as its input must be prepared to parse the data using ssccaannff(3C), + aawwkk(1), and so forth. The character is used when exactly one space + character is output. + + EEssccaappee SSeeqquueenncceess + The following table lists escape sequences and associated actions on + display devices capable of the action. + + + + + SSeeqquueennccee CChhaarraacctteerr TTeerrmmiinnaall AAccttiioonn + ──────────────────────────────────────────────── + \\\\ backslash None. + \\aa alert Attempts to alert + the user through + audible or visible + notification. + \\bb backspace Moves the printing + position to one + column before the + current position, + unless the current + position is the + start of a line. + \\ff form-feed Moves the printing + position to the + initial printing + position of the + next logical page. + \\nn newline Moves the printing + position to the + start of the next + line. + \\rr carriage-return Moves the printing + position to the + start of the + current line. + \\tt tab Moves the printing + position to the + next tab position + on the current + line. If there are + no more tab + positions left on + the line, the + behavior is + undefined. + \\vv vertical-tab Moves the printing + position to the + start of the next + vertical tab + position. If there + are no more + vertical tab + positions left on + the page, the + behavior is + undefined. + + + CCoonnvveerrssiioonn SSppeecciiffiiccaattiioonnss + Each conversion specification is introduced by the percent-sign + character (%). After the character %, the following appear in + sequence: + + _f_l_a_g_s + Zero or more _f_l_a_g_s, in any order, that modify + the meaning of the conversion specification. + + + _f_i_e_l_d _w_i_d_t_h + An optional string of decimal digits to + specify a minimum _f_i_e_l_d _w_i_d_t_h. For an output + field, if the converted value has fewer bytes + than the field width, it is padded on the + left (or right, if the left-adjustment flag + (−), described below, has been given to the + field width). + + + _p_r_e_c_i_s_i_o_n + Gives the minimum number of digits to appear + for the d, o, i, u, x or X conversions (the + field is padded with leading zeros), the + number of digits to appear after the radix + character for the e and f conversions, the + maximum number of significant digits for the + g conversion; or the maximum number of bytes + to be written from a string in s conversion. + The precision takes the form of a period (.) + followed by a decimal digit string; a null + digit string is treated as zero. + + + _c_o_n_v_e_r_s_i_o_n _c_h_a_r_a_c_t_e_r_s + A conversion character (see below) that + indicates the type of conversion to be + applied. + + + _f_l_a_g_s + The _f_l_a_g_s and their meanings are: + + _− + The result of the conversion is left-justified within the + field. + + + _+ + The result of a signed conversion always begins with a sign + (+ or −). + + + _<_s_p_a_c_e_> + If the first character of a signed conversion is not a + sign, a space character is prefixed to the result. This + means that if the space character and + flags both appear, + the space character flag is ignored. + + + _# + The value is to be converted to an alternative form. For c, + d, i, u, and s conversions, the behaviour is undefined. For + o conversion, it increases the precision to force the first + digit of the result to be a zero. For x or X conversion, a + non-zero result has 0x or 0X prefixed to it, respectively. + For e, E, f, g, and G conversions, the result always + contains a radix character, even if no digits follow the + radix character. For g and G conversions, trailing zeros + are not removed from the result as they usually are. + + + _0 + For d, i, o, u, x, X, e, E, f, g, and G conversions, + leading zeros (following any indication of sign or base) + are used to pad to the field width; no space padding is + performed. If the 0 and − flags both appear, the 0 flag is + ignored. For d, i, o, u, x and X conversions, if a + precision is specified, the 0 flag is ignored. For other + conversions, the behaviour is undefined. + + + CCoonnvveerrssiioonn CChhaarraacctteerrss + Each conversion character results in fetching zero or more arguments. + The results are undefined if there are insufficient arguments for the + format. If the format is exhausted while arguments remain, the excess + arguments are ignored. + + + The _c_o_n_v_e_r_s_i_o_n _c_h_a_r_a_c_t_e_r_s and their meanings are: + + _d_,_i_,_o_,_u_,_x_,_X + The integer argument is written as signed decimal (d or + i), unsigned octal (o), unsigned decimal (u), or + unsigned hexadecimal notation (x and X). The d and i + specifiers convert to signed decimal in the style + [[−]]_d_d_d_d. The x conversion uses the numbers and letters + 0123456789abcdef and the X conversion uses the numbers + and letters 0123456789ABCDEF. The _p_r_e_c_i_s_i_o_n component + of the argument specifies the minimum number of digits + to appear. If the value being converted can be + represented in fewer digits than the specified minimum, + it is expanded with leading zeros. The default + precision is 1. The result of converting a zero value + with a precision of 0 is no characters. If both the + field width and precision are omitted, the + implementation may precede, follow or precede and + follow numeric arguments of types d, i and u with blank + characters; arguments of type o (octal) may be preceded + with leading zeros. + + The treatment of integers and spaces is different from + the pprriinnttff(3C) function in that they can be surrounded + with blank characters. This was done so that, given a + format such as: + + "%d\n",<_f_o_o> + + the implementation could use a pprriinnttff(()) call such as: + + printf("%6d\n", _f_o_o); + + and still conform. This notation is thus somewhat like + ssccaannff(()) in addition to pprriinnttff(()).. + + + _f + The floating point number argument is written in + decimal notation in the style [[−]]_d_d_d._d_d_d, where the + number of digits after the radix character (shown here + as a decimal point) is equal to the _p_r_e_c_i_s_i_o_n + specification. The LLCC__NNUUMMEERRIICC locale category + determines the radix character to use in this format. + If the _p_r_e_c_i_s_i_o_n is omitted from the argument, six + digits are written after the radix character; if the + _p_r_e_c_i_s_i_o_n is explicitly 0, no radix character appears. + + + _e_,_E + The floating point number argument is written in the + style [[−]]_d._d_d_de±dddd (the symbol ± indicates either a + plus or minus sign), where there is one digit before + the radix character (shown here as a decimal point) and + the number of digits after it is equal to the + precision. The LLCC__NNUUMMEERRIICC locale category determines + the radix character to use in this format. When the + precision is missing, six digits are written after the + radix character; if the precision is 0, no radix + character appears. The E conversion character produces + a number with E instead of e introducing the exponent. + The exponent always contains at least two digits. + However, if the value to be written requires an + exponent greater than two digits, additional exponent + digits are written as necessary. + + + _g_,_G + The floating point number argument is written in style + f or e (or in style E in the case of a G conversion + character), with the precision specifying the number of + significant digits. The style used depends on the value + converted: style g is used only if the exponent + resulting from the conversion is less than −4 or + greater than or equal to the precision. Trailing zeros + are removed from the result. A radix character appears + only if it is followed by a digit. + + + _c + The integer argument is converted to an uunnssiiggnneedd cchhaarr + and the resulting byte is written. + + + _s + The argument is taken to be a string and bytes from the + string are written until the end of the string or the + number of bytes indicated by the _p_r_e_c_i_s_i_o_n + specification of the argument is reached. If the + precision is omitted from the argument, it is taken to + be infinite, so all bytes up to the end of the string + are written. + + + _% + Write a % character; no argument is converted. + + + + In no case does a non-existent or insufficient _f_i_e_l_d _w_i_d_t_h cause + truncation of a field; if the result of a conversion is wider than the + field width, the field is simply expanded to contain the conversion + result. The term _f_i_e_l_d _w_i_d_t_h should not be confused with the term + _p_r_e_c_i_s_i_o_n used in the description of %s. + + + One difference from the C function pprriinnttff(()) is that the l and h + conversion characters are not used. There is no differentiation between + decimal values for type iinntt, type lloonngg, or type sshhoorrtt. The + specifications %d or %i should be interpreted as an arbitrary length + sequence of digits. Also, no distinction is made between single + precision and double precision numbers (ffllooaatt or ddoouubbllee in C). These + are simply referred to as floating point numbers. + + + Many of the output descriptions use the term lliinnee, such as: + + "%s", <_i_n_p_u_t _l_i_n_e> + + + + Since the definition of lliinnee includes the trailing newline character + already, there is no need to include a \\nn in the format; a double + newline character would otherwise result. + +EEXXAAMMPPLLEESS + EExxaammppllee 11 To represent the output of a program that prints a date and + time in the form Sunday, July 3, 10:02, where _<_w_e_e_k_d_a_y_> and _<_m_o_n_t_h_> are + strings: + + "%s,/\%s/\%d,/\%d:%.2d\n",<_w_e_e_k_d_a_y>,<_m_o_n_t_h>,<_d_a_y>,<_h_o_u_r>,<_m_i_n> + + + EExxaammppllee 22 To show pi written to 5 decimal places: + + "pi/\=/\%.5f\n",<_v_a_l_u_e _o_f _p_i> + + + EExxaammppllee 33 To show an input file format consisting of five colon- + separated fields: + + "%s:%s:%s:%s:%s\n",<_a_r_g_1>,<_a_r_g_2>,<_a_r_g_3>,<_a_r_g_4>,<_a_r_g_5> + + +SSEEEE AALLSSOO + aawwkk(1), pprriinnttff(1), pprriinnttff(3C), ssccaannff(3C) + + March 28, 1995 FORMATS(7) diff --git a/usr/src/mei/printf/printf.1.man b/usr/src/mei/printf/printf.1.man new file mode 100644 index 0000000..259e9a9 --- /dev/null +++ b/usr/src/mei/printf/printf.1.man @@ -0,0 +1,692 @@ +PRINTF(1) User Commands PRINTF(1) + +NNAAMMEE + printf - write formatted output + +SSYYNNOOPPSSIISS + //uussrr//bbiinn//pprriinnttff + pprriinnttff _f_o_r_m_a_t [_a_r_g_u_m_e_n_t]... + + + kksshh9933 + pprriinnttff _f_o_r_m_a_t [_s_t_r_i_n_g...] + + +DDEESSCCRRIIPPTTIIOONN + //uussrr//bbiinn//pprriinnttff + The pprriinnttff utility writes each string operand to standard output using + _f_o_r_m_a_t to control the output format. + +OOPPEERRAANNDDSS + //uussrr//bbiinn//pprriinnttff + The following operands are supported by //uussrr//bbiinn//pprriinnttff: + + _f_o_r_m_a_t + A string describing the format to use to write the + remaining operands. The _f_o_r_m_a_t operand is used as the + _f_o_r_m_a_t string described on the ffoorrmmaattss(7) manual page, with + the following exceptions: + + o A SSPPAACCEE character in the format string, in any + context other than a flag of a conversion + specification, is treated as an ordinary + character that is copied to the output. + + o A character in the format string is treated as a + character, not as a SSPPAACCEE character. + + o In addition to the escape sequences described on + the ffoorrmmaattss(7) manual page (\\\\, \\aa, \\bb, \\ff, \\nn, + \\rr, \\tt, \\vv), \\_d_d_d, where _d_d_d is a one-, two- or + three-digit octal number, is written as a byte + with the numeric value specified by the octal + number. + + o The program does not precede or follow output + from the dd or uu conversion specifications with + blank characters not specified by the _f_o_r_m_a_t + operand. + + o The program does not precede output from the oo + conversion specification with zeros not + specified by the _f_o_r_m_a_t operand. + + o The argument used for the conversion character + (or width or precision parameters, see below) + may be taken from the _nnth argument instead of + the next unused argument, by specifying _n$$ + immediately following the %% character, or the ** + character (for width or precision arguments). + If _n$$ appears in any conversions in the format + string, then it must be used for all + conversions, including any variable width or + precision specifiers. + + o The special character ** may be used instead of a + string of decimal digits to indicate a minimum + field width or a precision. In this case the + next available argument is used (or the _nth if + the form _n$$ is used), treating its value as a + decimal string. + + o An additional conversion character, bb, is + supported as follows. The argument is taken to + be a string that can contain backslash-escape + sequences. The following backslash-escape + sequences are supported: + + o the escape sequences listed on the + ffoorrmmaattss(7) manual page (\\\\, \\aa, \\bb, \\ff, \\nn, + \\rr, \\tt, \\vv), which are converted to the + characters they represent + + o \\00_d_d_d, where _d_d_d is a zero-, one-, two- or + three-digit octal number that is converted + to a byte with the numeric value specified + by the octal number + + o \\cc, which is written and causes pprriinnttff to + ignore any remaining characters in the + string operand containing it, any remaining + string operands and any additional + characters in the _f_o_r_m_a_t operand. + The interpretation of a backslash followed by any other + sequence of characters is unspecified. + + Bytes from the converted string are written until the end + of the string or the number of bytes indicated by the + precision specification is reached. If the precision is + omitted, it is taken to be infinite, so all bytes up to the + end of the converted string are written. For each + specification that consumes an argument, the next argument + operand is evaluated and converted to the appropriate type + for the conversion as specified below. The _f_o_r_m_a_t operand + is reused as often as necessary to satisfy the argument + operands. Any extra cc or ss conversion specifications are + evaluated as if a null string argument were supplied; other + extra conversion specifications are evaluated as if a zero + argument were supplied. + + When there are more argument operands than format + specifiers, and the format includes _n$$ position indicators, + then the format is reprocessed from the beginning as above, + but with the argument list starting from the next argument + after the highest _nth argument previously encountered. + + If the _f_o_r_m_a_t operand contains no conversion specifications + and _a_r_g_u_m_e_n_t operands are present, the results are + unspecified. If a character sequence in the _f_o_r_m_a_t operand + begins with a %% character, but does not form a valid + conversion specification, the behavior is unspecified. + + + _a_r_g_u_m_e_n_t + The strings to be written to standard output, under the + control of ffoorrmmaatt. The _a_r_g_u_m_e_n_t operands are treated as + strings if the corresponding conversion character is bb, cc + or ss. Otherwise, it is evaluated as a C constant, as + described by the ISO C standard, with the following + extensions: + + o A leading plus or minus sign is allowed. + + o If the leading character is a single- or double- + quote, the value is the numeric value in the + underlying codeset of the character following + the single- or double-quote. + If an argument operand cannot be completely converted into + an internal value appropriate to the corresponding + conversion specification, a diagnostic message is written + to standard error and the utility does not exit with a zero + exit status, but continues processing any remaining + operands and writes the value accumulated at the time the + error was detected to standard output. + + + kksshh9933 + The _f_o_r_m_a_t operands support the full range of ANSI C/C99/XPG6 + formatting specifiers as well as additional specifiers: + + %%bb + Each character in the string operand is processed specially, as + follows: + + \\aa + Alert character. + + + \\bb + Backspace character. + + + \\cc + Terminate output without appending NEWLINE. The remaining + string operands are ignored. + + + \\EE + Escape character (AASSCCIIII octal 003333). + + + \\ff + FORM FEED character. + + + \\nn + NEWLINE character. + + + \\tt + TAB character. + + + \\vv + Vertical tab character. + + + \\\\ + Backslash character. + + + \\00_x + The 8-bit character whose AASSCCIIII code is the 11-, 22-, or + 33-digit octal number _x. + + + + %%BB + Treat the argument as a variable name and output the value + without converting it to a string. This is most useful for + variables of type --bb. + + + %%HH + Output string with characters <<, &&, >>, "", and non-printable + characters, properly escaped for use in HTML and XML documents. + + + %%PP + Treat _s_t_r_i_n_g as an extended regular expression and convert it to + a shell pattern. + + + %%qq + Output _s_t_r_i_n_g quoted in a manner that it can be read in by the + shell to get back the same string. However, empty strings + resulting from missing string operands are not quoted. + + + %%RR + Treat _s_t_r_i_n_g as an shell pattern expression and convert it to an + extended regular expression. + + + %%TT + Treat _s_t_r_i_n_g as a date/time string and format it. The TT can be + preceded by (_d_f_o_r_m_a_t), where _d_f_o_r_m_a_t is a date format as defined + by the ddaattee(1) command. + + + %%ZZ + Output a byte whose value is 00. + + + + When performing conversions of _s_t_r_i_n_g to satisfy a numeric format + specifier, if the first character of _s_t_r_i_n_g is ""oorr'', the value is the + numeric value in the underlying code set of the character following the + ""oorr''. Otherwise, _s_t_r_i_n_g is treated like a shell arithmetic expression + and evaluated. + + + If a _s_t_r_i_n_g operand cannot be completely converted into a value + appropriate for that format specifier, an error occurs, but remaining + _s_t_r_i_n_g operands continue to be processed. + + + In addition to the format specifier extensions, the following + extensions of ANSI C/C99/XPG6 are permitted in format specifiers: + + o The escape sequences \\EE and \\ee expand to the escape + character which is octal 033 in ASCII. + + o The escape sequence \\ccxx expands to CTRL-x. + + o The escape sequence \\CC[[.._n_a_m_e..]] expands to the collating + element _n_a_m_e. + + o The escape sequence \\xx{{hheexx}} expands to the character + corresponding to the hexadecimal value hheexx. + + o The format modifier flag = can be used to center a field to + a specified width. When the output is a terminal, the + character width is used rather than the number of bytes. + + o Each of the integral format specifiers can have a third + modifier after width and precision that specifies the base + of the conversion from 2 to 64. In this case, the ## modifier + causes _b_a_s_e## to be prepended to the value. + + o The ## modifier can be used with the dd specifier when no base + is specified to cause the output to be written in units of + 1000 with a suffix of one of kk MM GG TT PP EE. + + o The ## modifier can be used with the ii specifier to cause the + output to be written in units of 11002244 with a suffix of one + of KKii MMii GGii TTii PPii EEii. + + + If there are more _s_t_r_i_n_g operands than format specifiers, the format + string is reprocessed from the beginning. If there are fewer _s_t_r_i_n_g + operands than format specifiers, then _s_t_r_i_n_g specifiers are treated as + if empty strings were supplied, numeric conversions are treated as if 00 + was supplied, and time conversions are treated as if nnooww was supplied. + + + When there are more argument operands than format specifiers, and the + format includes _n$$ position indicators, then the format is reprocessed + from the beginning as above, but with the argument list starting from + the next argument after the highest _nth argument previously + encountered. + + + //uussrr//bbiinn//pprriinnttff is equivalent to kksshh9933's pprriinnttff built-in and pprriinntt --ff, + which allows additional options to be specified. + +UUSSAAGGEE + //uussrr//bbiinn//pprriinnttff + The pprriinnttff utility, like the pprriinnttff(3C) function on which it is based, + makes no special provision for dealing with multi-byte characters when + using the %%cc conversion specification. Applications should be extremely + cautious using either of these features when there are multi-byte + characters in the character set. + + + The %%bb conversion specification is not part of the ISO C standard; it + has been added here as a portable way to process backslash escapes + expanded in string operands as provided by the eecchhoo utility. See also + the USAGE section of the eecchhoo(1) manual page for ways to use pprriinnttff as + a replacement for all of the traditional versions of the eecchhoo utility. + + + If an argument cannot be parsed correctly for the corresponding + conversion specification, the pprriinnttff utility reports an error. Thus, + overflow and extraneous characters at the end of an argument being used + for a numeric conversion are to be reported as errors. + + + It is not considered an error if an argument operand is not completely + used for a cc or ss conversion or if a string operand's first or second + character is used to get the numeric value of a character. + +EEXXAAMMPPLLEESS + //uussrr//bbiinn//pprriinnttff + EExxaammppllee 11 Printing a Series of Prompts + + + The following example alerts the user, then prints and reads a series + of prompts: + + + example% pprriinnttff ""\\aaPPlleeaassee ffiillll iinn tthhee ffoolllloowwiinngg:: \\nnNNaammee:: "" + rreeaadd nnaammee + pprriinnttff ""PPhhoonnee nnuummbbeerr:: "" + rreeaadd pphhoonnee + + + + EExxaammppllee 22 Printing a Table of Calculations + + + The following example prints a table of calculations. It reads out a + list of right and wrong answers from a file, calculates the percentage + correctly, and prints them out. The numbers are right-justified and + separated by a single tab character. The percentage is written to one + decimal place of accuracy: + + + example% wwhhiillee rreeaadd rriigghhtt wwrroonngg ;; ddoo + ppeerrcceenntt==$$((eecchhoo ""ssccaallee==11;;(($$rriigghhtt**110000))//(($$rriigghhtt++$$wwrroonngg))"" || bbcc)) + pprriinnttff ""%%22dd rriigghhtt\\tt%%22dd wwrroonngg\\tt((%%ss%%%%))\\nn"" \\ + $$rriigghhtt $$wwrroonngg $$ppeerrcceenntt + ddoonnee << ddaattaabbaassee__ffiillee + + + + EExxaammppllee 33 Printing number strings + + + The command: + + + example% pprriinnttff ""%%55dd%%44dd\\nn"" 11 2211 332211 44332211 5544332211 + + + + + produces: + + + 1 21 + 3214321 + 54321 0 + + + + + The _f_o_r_m_a_t operand is used three times to print all of the given + strings and that a 00 was supplied by pprriinnttff to satisfy the last %%44dd + conversion specification. + + + EExxaammppllee 44 Tabulating Conversion Errors + + + The following example tabulates conversion errors. + + + + The pprriinnttff utility tells the user when conversion errors are detected + while producing numeric output. These results would be expected on an + implementation with 32-bit twos-complement integers when %%dd is + specified as the _f_o_r_m_a_t operand: + + + + + + ┌───────────────────────────────────────────────────────────────────┐ + │ Arguments Standard Diagnostic │ + │5a 5 printf: 5a not completely converted │ + │9999999999 2147483647 printf: 9999999999: Results too large │ + │-9999999999 -2147483648 printf: -9999999999: Results too large │ + │ABC 0 printf: ABC expected numeric value │ + └───────────────────────────────────────────────────────────────────┘ + + + The value shown on standard output is what would be expected as the + return value from the function ssttrrttooll(3C). A similar correspondence + exists between %%uu and ssttrrttoouull(3C), and %%ee, %%ff and %%gg and ssttrrttoodd(3C). + + + EExxaammppllee 55 Printing Output for a Specific Locale + + + The following example prints output for a specific locale. In a locale + using the ISO/IEC 646:1991 standard as the underlying codeset, the + command: + + + example% pprriinnttff ""%%dd\\nn"" 33 ++33 --33 \\''33 \\""++33 ""''--33"" + + + + + produces: + + + + + + ┌──────────────────────────────────┐ + │33 Numeric value of constant 3 │ + │33 Numeric value of constant 3 │ + │−−33 Numeric value of constant −3 │ + │5511 Numeric value of the │ + │ character `3' in the ISO/IEC │ + │ 646:1991 standard codeset │ + │4433 Numeric value of the │ + │ character `+' in the ISO/IEC │ + │ 646:1991 standard codeset │ + │4455 Numeric value of the │ + │ character `−' in the SO/IEC │ + │ 646:1991 standard codeset │ + └──────────────────────────────────┘ + + + In a locale with multi-byte characters, the value of a character is + intended to be the value of the equivalent of the wwcchhaarr__tt + representation of the character. + + + + If an argument operand cannot be completely converted into an internal + value appropriate to the corresponding conversion specification, a + diagnostic message is written to standard error and the utility does + exit with a zero exit status, but continues processing any remaining + operands and writes the value accumulated at the time the error was + detected to standard output. + + + EExxaammppllee 66 Alternative floating point representation 1 + + + The pprriinnttff utility supports an alternative floating point + representation (see pprriinnttff(3C) entry for the "%%aa"/"%%AA"), which allows + the output of floating-point values in a format that avoids the usual + base16 to base10 rounding errors. + + + example% printf "%a\n" 2 3.1 NaN + + + + + produces: + + + 0x1.0000000000000000000000000000p+01 + 0x1.8ccccccccccccccccccccccccccdp+01 + nan + + + + EExxaammppllee 77 Alternative floating point representation 2 + + + The following example shows two different representations of the same + floating-point value. + + + example% x=2 ; printf "%f == %a\n" x x + + + + + produces: + + + 2.000000 == 0x1.0000000000000000000000000000p+01 + + + + EExxaammppllee 88 Output of unicode values + + + The following command will print the EURO unicode symbol (code-point + 0x20ac). + + + example% LC_ALL=en_US.UTF-8 printf "[20ac]\n" + + + + + produces: + + + + + + + + where "" represents the EURO currency symbol character. + + + EExxaammppllee 99 Convert unicode character to unicode code-point value + + + The following command will print the hexadecimal value of a given + character. + + + example% export LC_ALL=en_US.UTF-8 + example% printf "%x\n" "'" + + + + + where "" represents the EURO currency symbol character (code- + point 0x20ac). + + + + produces: + + + 20ac + + + + EExxaammppllee 1100 Print the numeric value of an ASCII character + + example% printf "%d\n" "'A" + + + + + produces: + + + 65 + + + + EExxaammppllee 1111 Print the language-independent date and time format + + + To print the language-independent date and time format, the following + statement could be used: + + + example% printf "format" weekday month day hour min + + + + + For example, + + + $ printf format "Sunday" "July" 3 10 2 + + + + + For American usage, format could be the string: + + + "%s, %s %d, %d:%.2d\n" + + + + + producing the message: + + + Sunday, July 3, 10:02 + + + + + Whereas for EU usage, format could be the string: + + + "%1$s, %3$d. %2$s, %4$d:%5$.2d\n" + + + + + Note that the '$' characters must be properly escaped, such as + + + "%1\$s, %3\$d. %2\$s, %4\$d:%5\$.2d\n" in this case + + + + + producing the message: + + + Sunday, 3. July, 10:02 + + + +EENNVVIIRROONNMMEENNTT VVAARRIIAABBLLEESS + See eennvviirroonn(7) for descriptions of the following environment variables + that affect the execution of pprriinnttff: LLAANNGG, LLCC__AALLLL, LLCC__CCTTYYPPEE, + LLCC__MMEESSSSAAGGEESS, LLCC__NNUUMMEERRIICC, and NNLLSSPPAATTHH. + +EEXXIITT SSTTAATTUUSS + The following exit values are returned: + + 00 + Successful completion. + + + >>00 + An error occurred. + + +AATTTTRRIIBBUUTTEESS + See aattttrriibbuutteess(7) for descriptions of the following attributes: + + //uussrr//bbiinn//pprriinnttff + + + + ┌────────────────────┬───────────────────┐ + │ ATTRIBUTE TYPE │ ATTRIBUTE VALUE │ + ├────────────────────┼───────────────────┤ + │CSI │ Enabled │ + ├────────────────────┼───────────────────┤ + │Interface Stability │ Committed │ + ├────────────────────┼───────────────────┤ + │Standard │ See ssttaannddaarrddss(7). │ + └────────────────────┴───────────────────┘ + + kksshh9933 + + + + ┌────────────────────┬─────────────────┐ + │ ATTRIBUTE TYPE │ ATTRIBUTE VALUE │ + ├────────────────────┼─────────────────┤ + │Interface Stability │ Uncommitted │ + └────────────────────┴─────────────────┘ + +SSEEEE AALLSSOO + aawwkk(1), bbcc(1), ddaattee(1), eecchhoo(1), kksshh9933(1), pprriinnttff(3C), ssttrrttoodd(3C), + ssttrrttooll(3C), ssttrrttoouull(3C), aattttrriibbuutteess(7), eennvviirroonn(7), ffoorrmmaattss(7), + ssttaannddaarrddss(7) + +NNOOTTEESS + Using format specifiers (characters following '%') which are not listed + in the pprriinnttff(3C) or this manual page will result in undefined + behavior. + + + Using escape sequences (the character following a backslash ('\')) + which are not listed in the pprriinnttff(3C) or this manual page will result + in undefined behavior. + + + Floating-point values follow C99, XPG6 and IEEE 754 standard behavior + and can handle values the same way as the platform's |lloonngg ddoouubbllee| + datatype. + + + Floating-point values handle the sign separately which allows signs for + values like NaN (for example, -nan), Infinite (for example, -inf) and + zero (for example, -0.0). + + May 11, 2014 PRINTF(1) diff --git a/usr/src/mei/printf/printf.c b/usr/src/mei/printf/printf.c new file mode 100644 index 0000000..c5948ff --- /dev/null +++ b/usr/src/mei/printf/printf.c @@ -0,0 +1,643 @@ +/* + * Copyright 2014 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define warnx1(a, b, c) warnx(a) +#define warnx2(a, b, c) warnx(a, b) +#define warnx3(a, b, c) warnx(a, b, c) + +#define PTRDIFF(x, y) ((uintptr_t)(x) - (uintptr_t)(y)) + +#define _(x) gettext(x) + +#define PF(f, func) do { \ + char *b = NULL; \ + if (havewidth) \ + if (haveprec) \ + (void) asprintf(&b, f, fieldwidth, precision, func); \ + else \ + (void) asprintf(&b, f, fieldwidth, func); \ + else if (haveprec) \ + (void) asprintf(&b, f, precision, func); \ + else \ + (void) asprintf(&b, f, func); \ + if (b) { \ + (void) fputs(b, stdout); \ + free(b); \ + } \ +_NOTE(CONSTCOND) } while (0) + +static int asciicode(void); +static char *doformat(char *, int *); +static int escape(char *, int, size_t *); +static int getchr(void); +static int getfloating(long double *, int); +static int getint(int *); +static int getnum(intmax_t *, uintmax_t *, int); +static const char + *getstr(void); +static char *mknum(char *, char); +static void usage(void); + +static const char digits[] = "0123456789"; + +static int myargc; +static char **myargv; +static char **gargv; +static char **maxargv; + +int +main(int argc, char *argv[]) +{ + size_t len; + int end, rval; + char *format, *fmt, *start; + + (void) setlocale(LC_ALL, ""); + + argv++; + argc--; + + /* + * POSIX says: Standard utilities that do not accept options, + * but that do accept operands, shall recognize "--" as a + * first argument to be discarded. + */ + if (argc && strcmp(argv[0], "--") == 0) { + argc--; + argv++; + } + + if (argc < 1) { + usage(); + return (1); + } + + /* + * Basic algorithm is to scan the format string for conversion + * specifications -- once one is found, find out if the field + * width or precision is a '*'; if it is, gather up value. Note, + * format strings are reused as necessary to use up the provided + * arguments, arguments of zero/null string are provided to use + * up the format string. + */ + fmt = format = *argv; + (void) escape(fmt, 1, &len); /* backslash interpretation */ + rval = end = 0; + gargv = ++argv; + + for (;;) { + maxargv = gargv; + + myargv = gargv; + for (myargc = 0; gargv[myargc]; myargc++) + /* nop */; + start = fmt; + while (fmt < format + len) { + if (fmt[0] == '%') { + (void) fwrite(start, 1, PTRDIFF(fmt, start), + stdout); + if (fmt[1] == '%') { + /* %% prints a % */ + (void) putchar('%'); + fmt += 2; + } else { + fmt = doformat(fmt, &rval); + if (fmt == NULL) + return (1); + end = 0; + } + start = fmt; + } else + fmt++; + if (gargv > maxargv) + maxargv = gargv; + } + gargv = maxargv; + + if (end == 1) { + warnx1(_("missing format character"), NULL, NULL); + return (1); + } + (void) fwrite(start, 1, PTRDIFF(fmt, start), stdout); + if (!*gargv) + return (rval); + /* Restart at the beginning of the format string. */ + fmt = format; + end = 1; + } + /* NOTREACHED */ +} + + +static char * +doformat(char *fmt, int *rval) +{ + static const char skip1[] = "#'-+ 0"; + int fieldwidth, haveprec, havewidth, mod_ldbl, precision; + char convch, nextch; + char *start; + char **fargv; + char *dptr; + int l; + + start = alloca(strlen(fmt) + 1); + + dptr = start; + *dptr++ = '%'; + *dptr = 0; + + fmt++; + + /* look for "n$" field index specifier */ + l = strspn(fmt, digits); + if ((l > 0) && (fmt[l] == '$')) { + int idx = atoi(fmt); + if (idx <= myargc) { + gargv = &myargv[idx - 1]; + } else { + gargv = &myargv[myargc]; + } + if (gargv > maxargv) { + maxargv = gargv; + } + fmt += l + 1; + + /* save format argument */ + fargv = gargv; + } else { + fargv = NULL; + } + + /* skip to field width */ + while (*fmt && strchr(skip1, *fmt) != NULL) { + *dptr++ = *fmt++; + *dptr = 0; + } + + + if (*fmt == '*') { + + fmt++; + l = strspn(fmt, digits); + if ((l > 0) && (fmt[l] == '$')) { + int idx = atoi(fmt); + if (fargv == NULL) { + warnx1(_("incomplete use of n$"), NULL, NULL); + return (NULL); + } + if (idx <= myargc) { + gargv = &myargv[idx - 1]; + } else { + gargv = &myargv[myargc]; + } + fmt += l + 1; + } else if (fargv != NULL) { + warnx1(_("incomplete use of n$"), NULL, NULL); + return (NULL); + } + + if (getint(&fieldwidth)) + return (NULL); + if (gargv > maxargv) { + maxargv = gargv; + } + havewidth = 1; + + *dptr++ = '*'; + *dptr = 0; + } else { + havewidth = 0; + + /* skip to possible '.', get following precision */ + while (isdigit(*fmt)) { + *dptr++ = *fmt++; + *dptr = 0; + } + } + + if (*fmt == '.') { + /* precision present? */ + fmt++; + *dptr++ = '.'; + + if (*fmt == '*') { + + fmt++; + l = strspn(fmt, digits); + if ((l > 0) && (fmt[l] == '$')) { + int idx = atoi(fmt); + if (fargv == NULL) { + warnx1(_("incomplete use of n$"), + NULL, NULL); + return (NULL); + } + if (idx <= myargc) { + gargv = &myargv[idx - 1]; + } else { + gargv = &myargv[myargc]; + } + fmt += l + 1; + } else if (fargv != NULL) { + warnx1(_("incomplete use of n$"), NULL, NULL); + return (NULL); + } + + if (getint(&precision)) + return (NULL); + if (gargv > maxargv) { + maxargv = gargv; + } + haveprec = 1; + *dptr++ = '*'; + *dptr = 0; + } else { + haveprec = 0; + + /* skip to conversion char */ + while (isdigit(*fmt)) { + *dptr++ = *fmt++; + *dptr = 0; + } + } + } else + haveprec = 0; + if (!*fmt) { + warnx1(_("missing format character"), NULL, NULL); + return (NULL); + } + *dptr++ = *fmt; + *dptr = 0; + + /* + * Look for a length modifier. POSIX doesn't have these, so + * we only support them for floating-point conversions, which + * are extensions. This is useful because the L modifier can + * be used to gain extra range and precision, while omitting + * it is more likely to produce consistent results on different + * architectures. This is not so important for integers + * because overflow is the only bad thing that can happen to + * them, but consider the command printf %a 1.1 + */ + if (*fmt == 'L') { + mod_ldbl = 1; + fmt++; + if (!strchr("aAeEfFgG", *fmt)) { + warnx2(_("bad modifier L for %%%c"), *fmt, NULL); + return (NULL); + } + } else { + mod_ldbl = 0; + } + + /* save the current arg offset, and set to the format arg */ + if (fargv != NULL) { + gargv = fargv; + } + + convch = *fmt; + nextch = *++fmt; + + *fmt = '\0'; + switch (convch) { + case 'b': { + size_t len; + char *p; + int getout; + + p = strdup(getstr()); + if (p == NULL) { + warnx2("%s", strerror(ENOMEM), NULL); + return (NULL); + } + getout = escape(p, 0, &len); + (void) fputs(p, stdout); + free(p); + + if (getout) + exit(*rval); + break; + } + case 'c': { + char p; + + p = getchr(); + PF(start, p); + break; + } + case 's': { + const char *p; + + p = getstr(); + PF(start, p); + break; + } + case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': { + char *f; + intmax_t val; + uintmax_t uval; + int signedconv; + + signedconv = (convch == 'd' || convch == 'i'); + if ((f = mknum(start, convch)) == NULL) + return (NULL); + if (getnum(&val, &uval, signedconv)) + *rval = 1; + if (signedconv) + PF(f, val); + else + PF(f, uval); + break; + } + case 'e': case 'E': + case 'f': case 'F': + case 'g': case 'G': + case 'a': case 'A': { + long double p; + + if (getfloating(&p, mod_ldbl)) + *rval = 1; + if (mod_ldbl) + PF(start, p); + else + PF(start, (double)p); + break; + } + default: + warnx2(_("illegal format character %c"), convch, NULL); + return (NULL); + } + *fmt = nextch; + + /* return the gargv to the next element */ + return (fmt); +} + +static char * +mknum(char *str, char ch) +{ + static char *copy; + static size_t copy_size; + char *newcopy; + size_t len, newlen; + + len = strlen(str) + 2; + if (len > copy_size) { + newlen = ((len + 1023) >> 10) << 10; + if ((newcopy = realloc(copy, newlen)) == NULL) { + warnx2("%s", strerror(ENOMEM), NULL); + return (NULL); + } + copy = newcopy; + copy_size = newlen; + } + + (void) memmove(copy, str, len - 3); + copy[len - 3] = 'j'; + copy[len - 2] = ch; + copy[len - 1] = '\0'; + return (copy); +} + +static int +escape(char *fmt, int percent, size_t *len) +{ + char *save, *store, c; + int value; + + for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) { + if (c != '\\') { + *store = c; + continue; + } + switch (*++fmt) { + case '\0': /* EOS, user error */ + *store = '\\'; + *++store = '\0'; + *len = PTRDIFF(store, save); + return (0); + case '\\': /* backslash */ + case '\'': /* single quote */ + *store = *fmt; + break; + case 'a': /* bell/alert */ + *store = '\a'; + break; + case 'b': /* backspace */ + *store = '\b'; + break; + case 'c': + if (!percent) { + *store = '\0'; + *len = PTRDIFF(store, save); + return (1); + } + *store = 'c'; + break; + case 'f': /* form-feed */ + *store = '\f'; + break; + case 'n': /* newline */ + *store = '\n'; + break; + case 'r': /* carriage-return */ + *store = '\r'; + break; + case 't': /* horizontal tab */ + *store = '\t'; + break; + case 'v': /* vertical tab */ + *store = '\v'; + break; + /* octal constant */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c = (!percent && *fmt == '0') ? 4 : 3; + for (value = 0; + c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) { + value <<= 3; + value += *fmt - '0'; + } + --fmt; + if (percent && value == '%') { + *store++ = '%'; + *store = '%'; + } else + *store = (char)value; + break; + default: + *store = *fmt; + break; + } + } + *store = '\0'; + *len = PTRDIFF(store, save); + return (0); +} + +static int +getchr(void) +{ + if (!*gargv) + return ('\0'); + return ((int)**gargv++); +} + +static const char * +getstr(void) +{ + if (!*gargv) + return (""); + return (*gargv++); +} + +static int +getint(int *ip) +{ + intmax_t val; + uintmax_t uval; + int rval; + + if (getnum(&val, &uval, 1)) + return (1); + rval = 0; + if (val < INT_MIN || val > INT_MAX) { + warnx3("%s: %s", *gargv, strerror(ERANGE)); + rval = 1; + } + *ip = (int)val; + return (rval); +} + +static int +getnum(intmax_t *ip, uintmax_t *uip, int signedconv) +{ + char *ep; + int rval; + + if (!*gargv) { + *ip = *uip = 0; + return (0); + } + if (**gargv == '"' || **gargv == '\'') { + if (signedconv) + *ip = asciicode(); + else + *uip = asciicode(); + return (0); + } + rval = 0; + errno = 0; + if (signedconv) + *ip = strtoimax(*gargv, &ep, 0); + else + *uip = strtoumax(*gargv, &ep, 0); + if (ep == *gargv) { + warnx2(_("%s: expected numeric value"), *gargv, NULL); + rval = 1; + } else if (*ep != '\0') { + warnx2(_("%s: not completely converted"), *gargv, NULL); + rval = 1; + } + if (errno == ERANGE) { + warnx3("%s: %s", *gargv, strerror(ERANGE)); + rval = 1; + } + ++gargv; + return (rval); +} + +static int +getfloating(long double *dp, int mod_ldbl) +{ + char *ep; + int rval; + + if (!*gargv) { + *dp = 0.0; + return (0); + } + if (**gargv == '"' || **gargv == '\'') { + *dp = asciicode(); + return (0); + } + rval = 0; + errno = 0; + if (mod_ldbl) + *dp = strtold(*gargv, &ep); + else + *dp = strtod(*gargv, &ep); + if (ep == *gargv) { + warnx2(_("%s: expected numeric value"), *gargv, NULL); + rval = 1; + } else if (*ep != '\0') { + warnx2(_("%s: not completely converted"), *gargv, NULL); + rval = 1; + } + if (errno == ERANGE) { + warnx3("%s: %s", *gargv, strerror(ERANGE)); + rval = 1; + } + ++gargv; + return (rval); +} + +static int +asciicode(void) +{ + int ch; + + ch = **gargv; + if (ch == '\'' || ch == '"') + ch = (*gargv)[1]; + ++gargv; + return (ch); +} + +static void +usage(void) +{ + (void) fprintf(stderr, _("usage: printf format [arguments ...]\n")); +} diff --git a/usr/src/mei/printf/src/printf.rs b/usr/src/mei/printf/src/printf.rs index b343e6d..b9c129c 100644 --- a/usr/src/mei/printf/src/printf.rs +++ b/usr/src/mei/printf/src/printf.rs @@ -54,62 +54,27 @@ fn usage() { fn escape(escstr: String) -> String { let escmap = vec![ ("\\\\", "\\"), - //("\\a", "\a"), - //("\\b", "\b"), - //("\\f", "\f"), + //("\\a", "\a"), // alert! (run bell) + //("\\b", "\b"), // backspace: moves the printing position back one unless the beginning of + // the line + //("\\f", "\f"), // form-feed: moves the printint position to the start of the next logical + // page ... not even sure what this means in modern practice, or how to + // implement it. My paper terminal doesn't have pages. ("\\n", "\n"), ("\\r", "\r"), ("\\t", "\t"), - //("\\v", "\v"), + //("\\v", "\v"), // vertical-tab: moves the printing position to the next vertical tab + // position. If no vtabs are left, behaviour is undefined. ]; let mut im = escstr.to_owned(); for esc in escmap { im = str::replace(im.as_str(), esc.0, esc.1); } - // TODO: Handle octal esc + // TODO: Handle octal esc \nnn where nnn is a 1-, 2-, or 3-digit octal number im.clone() } -/* - * fn fmt(fmtstr: String, args: Vec) -> String { - * let mut formatted: String = "".to_string(); - * let mut data = args.into_iter(); - * let fmtiter = fmtstr.clone(); - * let mut fmtiter = fmtiter.chars().peekable(); - * for _idx in 1..fmtstr.len() { - * let c: Option = fmtiter.next(); - * match c { - * Some('%') => { - * let next: char = if let Some(v) = fmtiter.next() { - * v - * } else { - * formatted += String::from('%').as_str(); - * break; - * }; - * match next { - * 'd' => { - * if let Some(arg) = data.next() { - * if let Ok(i) = arg.parse::() { - * formatted += String::from(format!("{i}")).as_str(); - * } else { - * eprintln!("printf: trying to format non-int data as int"); - * } - * } else { - * eprintln!("printf: format argument not supplied"); - * } - * }, - * _ => formatted += {String::from('%') + String::from(next).as_str()}.as_str(), - * } - * }, - * Some(c) => formatted += String::from(c).as_str(), - * None => break, - * } - * } - * formatted.clone() - * } - */ - fn fmtint(s: &str, d: Option) -> String { if let Some(i) = d { format!("{i}") @@ -118,46 +83,94 @@ fn fmtint(s: &str, d: Option) -> String { } } +fn fmthex(s: &str, d: Option, upper: bool) -> String { + String::new() +} + +fn fmtstr(s: &str, d: Option) -> String { + if let Some(st) = d { + format!("{st}") + } else { + String::new() + } +} + fn chkfmt(chkstr: &str) -> bool { // TODO: check if the thing is correct true } -fn fmt(fmtstr: String, args: Vec) -> String { - let mut formatted: String = "".to_string(); +fn fmt(fmtstrng: String, args: Vec) -> Option { + let mut formatted: String = String::new(); let mut args = args.into_iter(); - let fmtb = fmtstr.as_bytes(); + let fmtb = fmtstrng.as_bytes(); let mut i = 0; while i < fmtb.len() { if fmtb[i] == b'%' { if !(i + 1 >= fmtb.len()) { let mut xe = i + 1; // find end of format specifier and get its index - while xe < fmtb.len() { - match fmtb[xe] { - b'd' | b'i' | b'o' | b'u' | b'x' | b'X' | b'f' | b'e' | b'E' | b'g' - | b'G' | b'c' | b's' | b'%' => break, - _ => xe += 1, + if fmtb[i + 1] != b'%' { + while xe < fmtb.len() { + match fmtb[xe] { + b'd' | b'i' | b'o' | b'u' | b'x' | b'X' | b'f' | b'e' | b'E' | b'g' + | b'G' | b'c' | b's' => break, + _ => xe += 1, + } } + if !(chkfmt(&fmtstrng[i..xe])) { + eprintln!("printf: invalid format string"); + i += 1; + continue; + } + if xe >= fmtb.len() { + eprintln!("printf: unmatched format escape"); + return None; + } + match fmtb[xe] { + // signed dec + b'd' => formatted += fmtint(&fmtstrng[i..xe], args.next()).as_str(), + + // unsigned dec + // b'u' => formatted += fmtuint(&fmtstrng[i..xe], args.next()).as_str(), + + // unsigned hex + b'X' => formatted += fmthex(&fmtstrng[i..xe], args.next(), true).as_str(), + b'x' => formatted += fmthex(&fmtstrng[i..xe], args.next(), false).as_str(), + + // unsigned oct + // b'o' => formatted += fmtoct(&fmtstrng[i..xe], args.next()).as_str(), + + // floating point + // b'f' => formatted += fmtfloat(&fmtstrng[i..xe], args.next()).as_str(), + // b'e' => formatted += fmtfloatext(&fmtstrng[i..xe], args.next(), false).as_str(), + // b'E' => formatted += fmtfloatext(&fmtstrng[i..xe], args.next(), true).as_str(), + // b'g' => formatted += this sucks + // b'G' => formatted += this sucks more + + // unsigned char + // b'c' => formatted += fmtchar(&fmtstrng[i..xe], args.next()).as_str(), + + // string + b's' => formatted += fmtstr(&fmtstrng[i..xe], args.next()).as_str(), + + // at this point, this match should be impossible + _ => { + eprintln!("illegal format character {}", &fmtstrng[xe..xe + 1]); + return None; + }, + }; + } else { + formatted += &"%"; } - if !(chkfmt(&fmtstr[i..xe])) { - eprintln!("printf: invalid format string"); - i += 1; - continue; - } - match fmtb[xe] { - b'd' => formatted += fmtint(&fmtstr[i..xe], args.next()).as_str(), - // at this point, this match should be impossible - _ => (), - }; i = xe; } } else { - formatted += &fmtstr[i..i+1]; + formatted += &fmtstrng[i..i+1]; } i += 1; } - formatted.clone() + Some(formatted.clone()) } fn main() -> ExitCode { @@ -169,8 +182,11 @@ fn main() -> ExitCode { } if let Some(fmtstr) = args.format { - let fmtstr = fmt(escape(fmtstr), args.argument); - print!("{fmtstr}"); + if let Some(fmtstr) = fmt(escape(fmtstr), args.argument) { + print!("{fmtstr}"); + } else { + return ExitCode::FAILURE; + } } ExitCode::SUCCESS