vfscanf.c (18228B)
1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #define FLOATING_POINT 1 38 39 #if defined(LIBC_SCCS) && !defined(lint) 40 static char rcsid[] = "$OpenBSD: vfscanf.c,v 1.6 1998/01/20 21:25:39 millert Exp $"; 41 #endif /* LIBC_SCCS and not lint */ 42 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <ctype.h> 46 #ifdef __STDC__ 47 #include <stdarg.h> 48 #else 49 #include <varargs.h> 50 #endif 51 #include "local.h" 52 53 #define FLOATING_POINT 1 54 55 #ifdef FLOATING_POINT 56 #include "floatio.h" 57 #endif 58 59 #define BUF 513 /* Maximum length of numeric string. */ 60 61 /* 62 * Flags used during conversion. 63 */ 64 #define LONG 0x01 /* l: long or double */ 65 #define LONGDBL 0x02 /* L: long double; unimplemented */ 66 #define SHORT 0x04 /* h: short */ 67 #define QUAD 0x08 /* q: quad */ 68 #define SUPPRESS 0x10 /* suppress assignment */ 69 #define POINTER 0x20 /* weird %p pointer (`fake hex') */ 70 #define NOSKIP 0x40 /* do not skip blanks */ 71 72 /* 73 * The following are used in numeric conversions only: 74 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 75 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 76 */ 77 #define SIGNOK 0x080 /* +/- is (still) legal */ 78 #define NDIGITS 0x100 /* no digits detected */ 79 80 #define DPTOK 0x200 /* (float) decimal point is still legal */ 81 #define EXPOK 0x400 /* (float) exponent (e+3, etc) still legal */ 82 83 #define PFXOK 0x200 /* 0x prefix is (still) legal */ 84 #define NZDIGITS 0x400 /* no zero digits detected */ 85 86 /* 87 * Conversion types. 88 */ 89 #define CT_CHAR 0 /* %c conversion */ 90 #define CT_CCL 1 /* %[...] conversion */ 91 #define CT_STRING 2 /* %s conversion */ 92 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 93 #define CT_FLOAT 4 /* floating, i.e., strtod */ 94 95 #define u_char unsigned char 96 #define u_long unsigned long 97 98 static u_char *__sccl(); 99 100 /* 101 * vfscanf 102 */ 103 int 104 __svfscanf(fp, fmt0, ap) 105 register FILE *fp; 106 char const *fmt0; 107 _BSD_VA_LIST_ ap; 108 { 109 register u_char *fmt = (u_char *)fmt0; 110 register int c; /* character from format, or conversion */ 111 register size_t width; /* field width, or 0 */ 112 register char *p; /* points into all kinds of strings */ 113 register int n; /* handy integer */ 114 register int flags; /* flags as defined above */ 115 register char *p0; /* saves original value of p when necessary */ 116 int nassigned; /* number of fields assigned */ 117 int nread; /* number of characters consumed from fp */ 118 int base; /* base argument to strtoq/strtouq */ 119 u_quad_t (*ccfn)(); /* conversion function (strtoq/strtouq) */ 120 char ccltab[256]; /* character class table for %[...] */ 121 char buf[BUF]; /* buffer for numeric conversions */ 122 123 /* `basefix' is used to avoid `if' tests in the integer scanner */ 124 static short basefix[17] = 125 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 126 127 nassigned = 0; 128 nread = 0; 129 base = 0; /* XXX just to keep gcc happy */ 130 ccfn = NULL; /* XXX just to keep gcc happy */ 131 for (;;) { 132 c = *fmt++; 133 if (c == 0) 134 return (nassigned); 135 if (isspace(c)) { 136 while ((fp->_r > 0 || __srefill(fp) == 0) && 137 isspace(*fp->_p)) 138 nread++, fp->_r--, fp->_p++; 139 continue; 140 } 141 if (c != '%') 142 goto literal; 143 width = 0; 144 flags = 0; 145 /* 146 * switch on the format. continue if done; 147 * break once format type is derived. 148 */ 149 again: c = *fmt++; 150 switch (c) { 151 case '%': 152 literal: 153 if (fp->_r <= 0 && __srefill(fp)) 154 goto input_failure; 155 if (*fp->_p != c) 156 goto match_failure; 157 fp->_r--, fp->_p++; 158 nread++; 159 continue; 160 161 case '*': 162 flags |= SUPPRESS; 163 goto again; 164 case 'L': 165 flags |= LONGDBL; 166 goto again; 167 case 'h': 168 flags |= SHORT; 169 goto again; 170 case 'l': 171 if (*fmt == 'l') { 172 fmt++; 173 flags |= QUAD; 174 } else { 175 flags |= LONG; 176 } 177 goto again; 178 case 'q': 179 flags |= QUAD; 180 goto again; 181 182 case '0': case '1': case '2': case '3': case '4': 183 case '5': case '6': case '7': case '8': case '9': 184 width = width * 10 + c - '0'; 185 goto again; 186 187 /* 188 * Conversions. 189 * Those marked `compat' are for 4.[123]BSD compatibility. 190 * 191 * (According to ANSI, E and X formats are supposed 192 * to the same as e and x. Sorry about that.) 193 */ 194 case 'D': /* compat */ 195 flags |= LONG; 196 /* FALLTHROUGH */ 197 case 'd': 198 c = CT_INT; 199 ccfn = (u_quad_t (*)())strtoq; 200 base = 10; 201 break; 202 203 case 'i': 204 c = CT_INT; 205 ccfn = (u_quad_t (*)())strtoq; 206 base = 0; 207 break; 208 209 case 'O': /* compat */ 210 flags |= LONG; 211 /* FALLTHROUGH */ 212 case 'o': 213 c = CT_INT; 214 ccfn = strtouq; 215 base = 8; 216 break; 217 218 case 'u': 219 c = CT_INT; 220 ccfn = strtouq; 221 base = 10; 222 break; 223 224 case 'X': 225 case 'x': 226 flags |= PFXOK; /* enable 0x prefixing */ 227 c = CT_INT; 228 ccfn = strtouq; 229 base = 16; 230 break; 231 232 #ifdef FLOATING_POINT 233 case 'E': 234 case 'G': 235 case 'e': 236 case 'f': 237 case 'g': 238 c = CT_FLOAT; 239 break; 240 #endif 241 242 case 's': 243 c = CT_STRING; 244 break; 245 246 case '[': 247 fmt = __sccl(ccltab, fmt); 248 flags |= NOSKIP; 249 c = CT_CCL; 250 break; 251 252 case 'c': 253 flags |= NOSKIP; 254 c = CT_CHAR; 255 break; 256 257 case 'p': /* pointer format is like hex */ 258 flags |= POINTER | PFXOK; 259 c = CT_INT; 260 ccfn = strtouq; 261 base = 16; 262 break; 263 264 case 'n': 265 if (flags & SUPPRESS) /* ??? */ 266 continue; 267 if (flags & SHORT) 268 *va_arg(ap, short *) = nread; 269 else if (flags & LONG) 270 *va_arg(ap, long *) = nread; 271 else 272 *va_arg(ap, int *) = nread; 273 continue; 274 275 /* 276 * Disgusting backwards compatibility hacks. XXX 277 */ 278 case '\0': /* compat */ 279 return (EOF); 280 281 default: /* compat */ 282 if (isupper(c)) 283 flags |= LONG; 284 c = CT_INT; 285 ccfn = (u_quad_t (*)())strtoq; 286 base = 10; 287 break; 288 } 289 290 /* 291 * We have a conversion that requires input. 292 */ 293 if (fp->_r <= 0 && __srefill(fp)) 294 goto input_failure; 295 296 /* 297 * Consume leading white space, except for formats 298 * that suppress this. 299 */ 300 if ((flags & NOSKIP) == 0) { 301 while (isspace(*fp->_p)) { 302 nread++; 303 if (--fp->_r > 0) 304 fp->_p++; 305 else if (__srefill(fp)) 306 goto input_failure; 307 } 308 /* 309 * Note that there is at least one character in 310 * the buffer, so conversions that do not set NOSKIP 311 * ca no longer result in an input failure. 312 */ 313 } 314 315 /* 316 * Do the conversion. 317 */ 318 switch (c) { 319 320 case CT_CHAR: 321 /* scan arbitrary characters (sets NOSKIP) */ 322 if (width == 0) 323 width = 1; 324 if (flags & SUPPRESS) { 325 size_t sum = 0; 326 for (;;) { 327 if ((n = fp->_r) < width) { 328 sum += n; 329 width -= n; 330 fp->_p += n; 331 if (__srefill(fp)) { 332 if (sum == 0) 333 goto input_failure; 334 break; 335 } 336 } else { 337 sum += width; 338 fp->_r -= width; 339 fp->_p += width; 340 break; 341 } 342 } 343 nread += sum; 344 } else { 345 size_t r = fread((void *)va_arg(ap, char *), 1, 346 width, fp); 347 348 if (r == 0) 349 goto input_failure; 350 nread += r; 351 nassigned++; 352 } 353 break; 354 355 case CT_CCL: 356 /* scan a (nonempty) character class (sets NOSKIP) */ 357 if (width == 0) 358 width = (size_t)~0; /* `infinity' */ 359 /* take only those things in the class */ 360 if (flags & SUPPRESS) { 361 n = 0; 362 while (ccltab[*fp->_p]) { 363 n++, fp->_r--, fp->_p++; 364 if (--width == 0) 365 break; 366 if (fp->_r <= 0 && __srefill(fp)) { 367 if (n == 0) 368 goto input_failure; 369 break; 370 } 371 } 372 if (n == 0) 373 goto match_failure; 374 } else { 375 p0 = p = va_arg(ap, char *); 376 while (ccltab[*fp->_p]) { 377 fp->_r--; 378 *p++ = *fp->_p++; 379 if (--width == 0) 380 break; 381 if (fp->_r <= 0 && __srefill(fp)) { 382 if (p == p0) 383 goto input_failure; 384 break; 385 } 386 } 387 n = p - p0; 388 if (n == 0) 389 goto match_failure; 390 *p = 0; 391 nassigned++; 392 } 393 nread += n; 394 break; 395 396 case CT_STRING: 397 /* like CCL, but zero-length string OK, & no NOSKIP */ 398 if (width == 0) 399 width = (size_t)~0; 400 if (flags & SUPPRESS) { 401 n = 0; 402 while (!isspace(*fp->_p)) { 403 n++, fp->_r--, fp->_p++; 404 if (--width == 0) 405 break; 406 if (fp->_r <= 0 && __srefill(fp)) 407 break; 408 } 409 nread += n; 410 } else { 411 p0 = p = va_arg(ap, char *); 412 while (!isspace(*fp->_p)) { 413 fp->_r--; 414 *p++ = *fp->_p++; 415 if (--width == 0) 416 break; 417 if (fp->_r <= 0 && __srefill(fp)) 418 break; 419 } 420 *p = 0; 421 nread += p - p0; 422 nassigned++; 423 } 424 continue; 425 426 case CT_INT: 427 /* scan an integer as if by strtoq/strtouq */ 428 #ifdef hardway 429 if (width == 0 || width > sizeof(buf) - 1) 430 width = sizeof(buf) - 1; 431 #else 432 /* size_t is unsigned, hence this optimisation */ 433 if (--width > sizeof(buf) - 2) 434 width = sizeof(buf) - 2; 435 width++; 436 #endif 437 flags |= SIGNOK | NDIGITS | NZDIGITS; 438 for (p = buf; width; width--) { 439 c = *fp->_p; 440 /* 441 * Switch on the character; `goto ok' 442 * if we accept it as a part of number. 443 */ 444 switch (c) { 445 446 /* 447 * The digit 0 is always legal, but is 448 * special. For %i conversions, if no 449 * digits (zero or nonzero) have been 450 * scanned (only signs), we will have 451 * base==0. In that case, we should set 452 * it to 8 and enable 0x prefixing. 453 * Also, if we have not scanned zero digits 454 * before this, do not turn off prefixing 455 * (someone else will turn it off if we 456 * have scanned any nonzero digits). 457 */ 458 case '0': 459 if (base == 0) { 460 base = 8; 461 flags |= PFXOK; 462 } 463 if (flags & NZDIGITS) 464 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 465 else 466 flags &= ~(SIGNOK|PFXOK|NDIGITS); 467 goto ok; 468 469 /* 1 through 7 always legal */ 470 case '1': case '2': case '3': 471 case '4': case '5': case '6': case '7': 472 base = basefix[base]; 473 flags &= ~(SIGNOK | PFXOK | NDIGITS); 474 goto ok; 475 476 /* digits 8 and 9 ok iff decimal or hex */ 477 case '8': case '9': 478 base = basefix[base]; 479 if (base <= 8) 480 break; /* not legal here */ 481 flags &= ~(SIGNOK | PFXOK | NDIGITS); 482 goto ok; 483 484 /* letters ok iff hex */ 485 case 'A': case 'B': case 'C': 486 case 'D': case 'E': case 'F': 487 case 'a': case 'b': case 'c': 488 case 'd': case 'e': case 'f': 489 /* no need to fix base here */ 490 if (base <= 10) 491 break; /* not legal here */ 492 flags &= ~(SIGNOK | PFXOK | NDIGITS); 493 goto ok; 494 495 /* sign ok only as first character */ 496 case '+': case '-': 497 if (flags & SIGNOK) { 498 flags &= ~SIGNOK; 499 goto ok; 500 } 501 break; 502 503 /* x ok iff flag still set & 2nd char */ 504 case 'x': case 'X': 505 if (flags & PFXOK && p == buf + 1) { 506 base = 16; /* if %i */ 507 flags &= ~PFXOK; 508 goto ok; 509 } 510 break; 511 } 512 513 /* 514 * If we got here, c is not a legal character 515 * for a number. Stop accumulating digits. 516 */ 517 break; 518 ok: 519 /* 520 * c is legal: store it and look at the next. 521 */ 522 *p++ = c; 523 if (--fp->_r > 0) 524 fp->_p++; 525 else if (__srefill(fp)) 526 break; /* EOF */ 527 } 528 /* 529 * If we had only a sign, it is no good; push 530 * back the sign. If the number ends in `x', 531 * it was [sign] '0' 'x', so push back the x 532 * and treat it as [sign] '0'. 533 */ 534 if (flags & NDIGITS) { 535 if (p > buf) 536 (void) ungetc(*(u_char *)--p, fp); 537 goto match_failure; 538 } 539 c = ((u_char *)p)[-1]; 540 if (c == 'x' || c == 'X') { 541 --p; 542 (void) ungetc(c, fp); 543 } 544 if ((flags & SUPPRESS) == 0) { 545 u_quad_t res; 546 547 *p = 0; 548 res = (*ccfn)(buf, (char **)NULL, base); 549 if (flags & POINTER) 550 *va_arg(ap, void **) = 551 (void *)(long)res; 552 else if (flags & QUAD) 553 *va_arg(ap, quad_t *) = res; 554 else if (flags & LONG) 555 *va_arg(ap, long *) = res; 556 else if (flags & SHORT) 557 *va_arg(ap, short *) = res; 558 else 559 *va_arg(ap, int *) = res; 560 nassigned++; 561 } 562 nread += p - buf; 563 break; 564 565 #ifdef FLOATING_POINT 566 case CT_FLOAT: 567 /* scan a floating point number as if by strtod */ 568 #ifdef hardway 569 if (width == 0 || width > sizeof(buf) - 1) 570 width = sizeof(buf) - 1; 571 #else 572 /* size_t is unsigned, hence this optimisation */ 573 if (--width > sizeof(buf) - 2) 574 width = sizeof(buf) - 2; 575 width++; 576 #endif 577 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 578 for (p = buf; width; width--) { 579 c = *fp->_p; 580 /* 581 * This code mimicks the integer conversion 582 * code, but is much simpler. 583 */ 584 switch (c) { 585 586 case '0': case '1': case '2': case '3': 587 case '4': case '5': case '6': case '7': 588 case '8': case '9': 589 flags &= ~(SIGNOK | NDIGITS); 590 goto fok; 591 592 case '+': case '-': 593 if (flags & SIGNOK) { 594 flags &= ~SIGNOK; 595 goto fok; 596 } 597 break; 598 case '.': 599 if (flags & DPTOK) { 600 flags &= ~(SIGNOK | DPTOK); 601 goto fok; 602 } 603 break; 604 case 'e': case 'E': 605 /* no exponent without some digits */ 606 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 607 flags = 608 (flags & ~(EXPOK|DPTOK)) | 609 SIGNOK | NDIGITS; 610 goto fok; 611 } 612 break; 613 } 614 break; 615 fok: 616 *p++ = c; 617 if (--fp->_r > 0) 618 fp->_p++; 619 else if (__srefill(fp)) 620 break; /* EOF */ 621 } 622 /* 623 * If no digits, might be missing exponent digits 624 * (just give back the exponent) or might be missing 625 * regular digits, but had sign and/or decimal point. 626 */ 627 if (flags & NDIGITS) { 628 if (flags & EXPOK) { 629 /* no digits at all */ 630 while (p > buf) 631 ungetc(*(u_char *)--p, fp); 632 goto match_failure; 633 } 634 /* just a bad exponent (e and maybe sign) */ 635 c = *(u_char *)--p; 636 if (c != 'e' && c != 'E') { 637 (void) ungetc(c, fp);/* sign */ 638 c = *(u_char *)--p; 639 } 640 (void) ungetc(c, fp); 641 } 642 if ((flags & SUPPRESS) == 0) { 643 double res; 644 645 *p = 0; 646 res = strtod(buf, (char **) NULL); 647 if (flags & LONGDBL) 648 *va_arg(ap, long double *) = res; 649 else if (flags & LONG) 650 *va_arg(ap, double *) = res; 651 else 652 *va_arg(ap, float *) = res; 653 nassigned++; 654 } 655 nread += p - buf; 656 break; 657 #endif /* FLOATING_POINT */ 658 } 659 } 660 input_failure: 661 return (nassigned ? nassigned : -1); 662 match_failure: 663 return (nassigned); 664 } 665 666 /* 667 * Fill in the given table from the scanset at the given format 668 * (just after `['). Return a pointer to the character past the 669 * closing `]'. The table has a 1 wherever characters should be 670 * considered part of the scanset. 671 */ 672 static u_char * 673 __sccl(tab, fmt) 674 register char *tab; 675 register u_char *fmt; 676 { 677 register int c, n, v; 678 679 /* first `clear' the whole table */ 680 c = *fmt++; /* first char hat => negated scanset */ 681 if (c == '^') { 682 v = 1; /* default => accept */ 683 c = *fmt++; /* get new first char */ 684 } else 685 v = 0; /* default => reject */ 686 /* should probably use memset here */ 687 for (n = 0; n < 256; n++) 688 tab[n] = v; 689 if (c == 0) 690 return (fmt - 1);/* format ended before closing ] */ 691 692 /* 693 * Now set the entries corresponding to the actual scanset 694 * to the opposite of the above. 695 * 696 * The first character may be ']' (or '-') without being special; 697 * the last character may be '-'. 698 */ 699 v = 1 - v; 700 for (;;) { 701 tab[c] = v; /* take character c */ 702 doswitch: 703 n = *fmt++; /* and examine the next */ 704 switch (n) { 705 706 case 0: /* format ended too soon */ 707 return (fmt - 1); 708 709 case '-': 710 /* 711 * A scanset of the form 712 * [01+-] 713 * is defined as `the digit 0, the digit 1, 714 * the character +, the character -', but 715 * the effect of a scanset such as 716 * [a-zA-Z0-9] 717 * is implementation defined. The V7 Unix 718 * scanf treats `a-z' as `the letters a through 719 * z', but treats `a-a' as `the letter a, the 720 * character -, and the letter a'. 721 * 722 * For compatibility, the `-' is not considerd 723 * to define a range if the character following 724 * it is either a close bracket (required by ANSI) 725 * or is not numerically greater than the character 726 * we just stored in the table (c). 727 */ 728 n = *fmt; 729 if (n == ']' || n < c) { 730 c = '-'; 731 break; /* resume the for(;;) */ 732 } 733 fmt++; 734 do { /* fill in the range */ 735 tab[++c] = v; 736 } while (c < n); 737 #if 1 /* XXX another disgusting compatibility hack */ 738 /* 739 * Alas, the V7 Unix scanf also treats formats 740 * such as [a-c-e] as `the letters a through e'. 741 * This too is permitted by the standard.... 742 */ 743 goto doswitch; 744 #else 745 c = *fmt++; 746 if (c == 0) 747 return (fmt - 1); 748 if (c == ']') 749 return (fmt); 750 #endif 751 break; 752 753 case ']': /* end of scanset */ 754 return (fmt); 755 756 default: /* just another character */ 757 c = n; 758 break; 759 } 760 } 761 /* NOTREACHED */ 762 }