-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscheme-read.lisp
1036 lines (974 loc) · 43.6 KB
/
scheme-read.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;;; -*- mode: common-lisp; -*-
;;; Note: This uses :SKIP and :EOF as special symbols. When this
;;; Scheme is extended to support CL-style keyword syntax, these will
;;; have to be renamed to avoid confusion with reading the actual
;;; keywords :SKIP and :EOF.
(in-package #:airship-scheme)
(eval-when (:compile-toplevel :load-toplevel :execute)
(defconstant +read-base+ 10
"
Integers are read in +read-base+ unless a prefix specifies otherwise.
This is similar to CL's *read-base*, but is a constant. This might be
replaced with a variable in the future.
")
(defconstant +flonum-base+ 10
"Flonums (floating point numbers) must always be in base 10."))
(defparameter *fold-case* nil
"
Determines if `string-foldcase' should be applied by default or not.
Note that this is different than the case-inversion that is always
done for compatibility with the upcasing reader of Common Lisp. That
one is done character-by-character and does not apply to most of
Unicode, but is (hopefully) always reversible, unlike Unicode.
")
;;;; Conditions
(define-condition scheme-reader-error (error)
((%details
:initarg :details
:reader details
:initform "An error occurred in the Scheme reader"))
(:report (lambda (condition stream)
(format stream "~A" (details condition))))
(:documentation "An error in the Scheme reader, i.e. invalid syntax."))
(define-condition scheme-reader-eof-error (scheme-reader-error)
((%details
:initarg :details
:reader details
:initform nil))
(:report (lambda (condition stream)
(format stream "Unexpected EOF read")
(when (details condition)
(format stream " ~A" (details condition)))))
(:documentation "An error in the Scheme reader where an unexpected EOF was read"))
(define-condition scheme-type-error (type-error)
((%details
:initarg :details
:reader details
:initform nil))
(:report (lambda (condition stream)
(format stream "Type error")
(when (details condition)
(format stream " ~A" (details condition)))
(format stream "; expected type: ~A datum: ~A"
(type-error-expected-type condition)
(type-error-datum condition))))
(:documentation "A type error internal to the Scheme runtime."))
;;; This should come up when an identifier tries to start with an
;;; invalid Unicode character. This is in section 7.1.1 of r7rs.pdf
(defun unicode-reader-error ()
(error 'scheme-reader-error
:details #.(concatenate 'string
"An identifier cannot start with a Unicode character "
"in the general categories of Nd, Mc, or Me. To portably "
"do this, first wrap the identifier in vertical lines "
"like |foo|.")))
;;; Note: A configuration option could make this a style warning for
;;; added compatibility.
(defun whitespace-in-u8-error (whitespace?)
(error-unless whitespace?
'scheme-reader-error
:details #.(concatenate 'string
"In a strict interpretation of the R7RS-small "
"standard in section 7.1.1, whitespace between #u8 "
"and its parentheses is not permitted.")))
(defun invalid-infnan-error ()
(error 'scheme-reader-error
:details #.(concatenate 'string
"Tokens that start with \"+inf.\", \"-inf.\", "
"\"+nan.\", and \"-nan.\" that are not currently "
"valid numeric syntax are reserved for future "
"expansions to the Airship Scheme numeric syntax.")))
;;; A simple macro for a simple EOF error.
(defmacro eof-error (details)
`(error 'scheme-reader-eof-error :details ,details))
;;;; Characters and types
(deftype complex-number-separator ()
"
A complex number is either written in rectangular notation (with
either + or - as the separator) or in polar notation (with @ as the
separator).
"
`(member #\+ #\- #\@))
(define-function (complex-number-separator? :inline t) (character)
(and (typep character 'complex-number-separator) character))
(deftype delimiter ()
"Characters, or EOF, that represent a delimiter in Scheme syntax."
`(member #\Space #\Newline #\( #\) #\; #\" #\Tab :eof))
(define-function (delimiter? :inline t) (character)
"Tests to see if the character (or :eof) is a delimiter."
(and (typep character 'delimiter) character))
(define-function (%delimiter? :inline t) (stream)
"Tests to see if the next character in a stream is a delimiter."
(delimiter? (peek-char* stream)))
(define-function (%negative? :inline t) (character)
"Tests to see if the character represents negation."
(eql #\- character))
(define-function (%sign :inline t) (character)
"Returns -1 if the character represents negation; otherwise, 1."
(if (%negative? character) -1 1))
;;;; Numbers
;;; Reads an integer of the given radix
(defun read-scheme-integer (stream &optional (radix 10))
(check-type radix (integer 2 16))
(loop :for match := (read-case (stream x)
((:or (:range #\0 #\9)
(:range #\a #\f)
(:range #\A #\F))
(or (digit-char-p x radix)
(progn (unread-char x stream) nil)))
(:eof nil)
(t (unread-char x stream) nil))
:for length :from 0
:with number := 0
:while match
:do (setf number (+ match (* number radix)))
:finally (return (values number length))))
;;; Stops when the stream no longer matches the string, returning the
;;; point where it stopped.
(define-function (always :inline t) (string stream)
(let* ((i 0)
(char nil)
(match? (loop :for c* :across string
:for c := (read-char stream nil nil)
:always (progn
(incf i)
(setf char c)
(and c (char-equal c c*))))))
(when (and (not match?) char)
(unread-char char stream))
(values match? (1- i))))
;;; Reads the final character if an NaN or inf candidate.
(define-function %read-final-char (result stream first?)
(if result
(read-case (stream char)
(#\0 (let ((next-char (peek-char* stream)))
(cond
((delimiter? next-char)
result)
((char-equal #\i next-char)
(when first?
(skip-read-char stream))
(if (or (not first?) (%delimiter? stream))
(f:with-float-traps-masked t
(values (if first? (complex 0 result) result) t))
(invalid-infnan-error)))
((and first? (complex-number-separator? next-char))
result)
(t
(invalid-infnan-error)))))
(t (invalid-infnan-error)))
(invalid-infnan-error)))
;;; Reads the exponent of a NaN or infinite flonum.
(defun read-exponent* (exact? unread-if-no-match? stream)
(read-case (stream exponent-char)
((:or #\e #\E) (if exact? 'integer 'double-float))
((:or #\d #\D) 'double-float)
((:or #\f #\F) 'single-float)
((:or #\l #\L) 'long-float)
((:or #\s #\S) 'short-float)
(t
(when (and unread-if-no-match?
(not (eql exponent-char :eof)))
(unread-char exponent-char stream))
nil)))
;;; Reads a NaN candidate, either as a NaN or as an identifier.
;;;
;;; As an extension, the exponentiation suffix is permitted (with 0 as
;;; the only allowed exponent) as a way to get a NaN of a different
;;; floating point type.
(defun %read-nan (sign-prefix stream no-symbol? first?)
(let ((negate? (%negative? sign-prefix))
(string "nan.0"))
(multiple-value-bind (match? index) (always string stream)
(let ((next-char (peek-char* stream)))
(cond ((not match?)
(if (< index (- (length string) 1))
(progn
(error-when no-symbol?
'scheme-reader-error
:details "Invalid numerical syntax.")
(read-scheme-symbol stream
:prefix (format nil
"~A~A"
sign-prefix
(subseq string 0 index))))
(invalid-infnan-error)))
((delimiter? next-char)
(nan 'double-float negate?))
((char-equal #\i next-char)
(when first?
(skip-read-char stream))
(if (or (not first?) (%delimiter? stream))
(f:with-float-traps-masked t
(let ((result (nan 'double-float negate?)))
(values (if first? (complex 0 result) result) t)))
(invalid-infnan-error)))
((and first? (complex-number-separator? next-char))
(nan 'double-float negate?))
(t
(%read-final-char (nan (read-exponent* nil nil stream) negate?) stream first?)))))))
;;; Reads an inf candidate, either as a trivial imaginary number, a
;;; floating point infinity, or as an identifier.
;;;
;;; As an extension, the exponentiation suffix is permitted (with 0 as
;;; the only allowed exponent) as a way to get an infinity of a
;;; different floating point type.
(defun %read-inf-or-i (sign-prefix stream no-symbol? first?)
(let ((negate? (%negative? sign-prefix))
(string "inf.0"))
(skip-read-char stream)
(if (%delimiter? stream)
(values (complex 0 (if negate? -1 1)) t)
(multiple-value-bind (match? index) (always (subseq string 1) stream)
(let ((next-char (peek-char* stream)))
(cond ((not match?)
(if (< index (- (length string) 2))
(progn
(error-when no-symbol?
'scheme-reader-error
:details "Invalid numerical syntax.")
(read-scheme-symbol stream
:prefix (format nil
"~A~A"
sign-prefix
(subseq string 0 (1+ index)))))
(invalid-infnan-error)))
((delimiter? next-char)
(inf 'double-float negate?))
((char-equal #\i next-char)
(when first?
(skip-read-char stream))
(if (or (not first?) (%delimiter? stream))
(f:with-float-traps-masked t
(let ((result (inf 'double-float negate?)))
(values (if first? (complex 0 result) result) t)))
(invalid-infnan-error)))
((and first? (complex-number-separator? next-char))
(inf 'double-float negate?))
(t
(%read-final-char (inf (read-exponent* nil nil stream) negate?) stream first?))))))))
;;; Reads a numeric sign if present.
(defun %read-sign (stream)
(case (peek-char* stream)
(:eof (eof-error "when a number was expected"))
((#\+ #\-) (read-char stream))
(t nil)))
;;; Checks the radix if the number is to be read as a flonum.
(defun check-flonum-radix (radix)
(error-unless (= +flonum-base+ radix)
'scheme-reader-error
:details (format nil
"A literal flonum must be in base ~D."
+flonum-base+)))
;;; Reads the exponent part of a flonum, after the exponent character
;;; is read.
(defun %read-exponent (number radix float-type stream)
(check-flonum-radix radix)
(let ((sign (%sign (%read-sign stream))))
(multiple-value-bind (number* length*) (read-scheme-integer stream radix)
(error-when (zerop length*)
'scheme-reader-error
:details "An exponent was expected but none was provided")
(* (if float-type
(coerce number float-type)
number)
(expt +flonum-base+ (* number* sign))))))
;;; Reads the exponent of a flonum.
(defun read-exponent (number radix exact? stream)
(let ((float-type (read-exponent* exact? t stream)))
(if float-type
(%read-exponent number
radix
(if (eql float-type 'integer)
nil
float-type)
stream)
number)))
;;; Reads a possible suffix for a number.
(defun %read-scheme-number-suffix (number radix exact? stream)
(read-case (stream match)
(#\/
(multiple-value-bind (number* length*) (read-scheme-integer stream radix)
(error-when (zerop length*)
'scheme-reader-error
:details "A fraction needs a denominator after the / sign.")
(/ number number*)))
(#\.
(check-flonum-radix radix)
(multiple-value-bind (number* length*) (read-scheme-integer stream radix)
(let ((number (+ number (/ number* (expt (double-float* +flonum-base+) length*)))))
(read-exponent number radix exact? stream))))
(:eof number)
(t
(unread-char match stream)
(read-exponent number radix exact? stream))))
;;; Reads a number that isn't a NaN or infinity.
(defun %read-regular-scheme-number (radix sign-prefix exact? stream)
(multiple-value-bind (number length) (read-scheme-integer stream radix)
;; A leading decimal point implicitly has a 0 in front.
;; Otherwise, no number at the start is an error.
(let ((next-char (peek-char* stream)))
(error-when (and (zerop length)
(not (eql #\. next-char)))
'scheme-reader-error
:details "No number could be read when a number was expected.")
(when (and (zerop length) (eql #\. next-char))
(setf (values number length) (values 0 1))))
(let* ((sign (%sign sign-prefix))
(number (* sign
(if (%delimiter? stream)
number
(%read-scheme-number-suffix number radix exact? stream)))))
number)))
(defun %read-infnan-or-regular-number (first? next-char radix sign-prefix stream no-symbol? exact?)
(cond ((and sign-prefix (char-equal next-char #\n))
;; Reads NaN or a symbol.
(%read-nan sign-prefix stream no-symbol? first?))
((and sign-prefix (char-equal next-char #\i))
;; Reads +i, -i, inf, or a symbol.
(%read-inf-or-i sign-prefix stream no-symbol? first?))
((or (digit-char-p next-char radix)
(eql next-char #\.))
;; Reads a number like 4 or .4
(%read-regular-scheme-number radix sign-prefix exact? stream))
;; For symbols that begin with + or -, such as
;; CL-style constant names, e.g. +foo+, excluding +
;; or - themselves (the first case in the COND).
((and first? sign-prefix)
(error-when no-symbol?
'scheme-reader-error
:details "Invalid numerical syntax.")
(read-scheme-symbol stream :prefix (make-string 1 :initial-element sign-prefix)))
;; Everything else is an error here.
;;
;; Note: This won't error on e.g. "inf" or "nan"
;; without the prefix because those should be read
;; as a symbol, not as a potential number.
(t
(error 'scheme-reader-error
:details (format nil
"Failure to read a number when reading ~A"
next-char)))))
;;; Reads a Scheme number in the given radix. If end? then it must be
;;; the end of the stream after reading the number.
;;;
;;; Note: Instead of an error, most failed candidates of a number
;;; could be read as a symbol, like in CL and Racket. This is
;;; potentially still valid as a symbol in R7RS-small if it began with
;;; a . instead of a number, such as .1foo
;;;
;;; Note: This extends the syntax by permitting an imaginary number to
;;; exist without a sign prefix in certain cases, e.g. "4i".
(defun %read-scheme-number (stream radix &optional no-symbol? exact?)
;; A complex number has two different ways to have a second part:
;;
;; {first}+{second}i or {first}-{second}i
;; {first}@{second}
(flet ((first-part (stream)
(let ((sign-prefix (%read-sign stream))
(next-char (peek-char* stream)))
(if (delimiter? next-char)
(intern (string sign-prefix))
(%read-infnan-or-regular-number t next-char radix sign-prefix stream no-symbol? exact?))))
;; Note: Ending in a delimiter means there is no second part.
;; Ending in an #\i means that the "first" part was really
;; the second part.
;;
;; Note: Some edge case potential symbols are currently
;; errors because of the complex syntax, e.g. +inf.0@-inf.,
;; which looks like a valid infnan for most of the way.
(second-part (number stream)
(multiple-value-bind (number delimiter?)
(if (%delimiter? stream)
(values number t)
(values
(read-case (stream match)
((:or #\i #\I)
(complex 0 number))
(#\@
(let* ((sign-prefix* (%read-sign stream))
(next-char (peek-char* stream))
(number* (%read-infnan-or-regular-number nil next-char radix sign-prefix* stream t exact?)))
(f:with-float-traps-masked t
(* (if (rationalp number) (double-float* number) number)
(cis (if (rationalp number) (double-float* number*) number*))))))
((:or #\+ #\-)
(let* ((sign-prefix* match)
(next-char (peek-char* stream))
(number* (%read-infnan-or-regular-number nil next-char radix sign-prefix* stream t exact?)))
(f:with-float-traps-masked t
(read-case (stream match)
((:or #\i #\I)
(complex number number*))
;; If the second part was already
;; processed as just an #\i then number*
;; is already a complex number.
(t
(error-unless (and (complexp number*)
(zerop (realpart number*))
(zerop (imagpart number)))
'scheme-reader-error
:details "Invalid numerical syntax.")
(complex number (imagpart number*)))))))
(t
(error 'scheme-reader-error
:details "Invalid numerical syntax.")))
(%delimiter? stream)))
(error-unless delimiter?
'scheme-reader-error
:details "Invalid numerical syntax.")
number)))
;; Note: These aren't necessarily the real and imaginary part.
;; Also, imaginary-first-part? doesn't detect all cases of an
;; imaginary first part because most of the checks for a trailing
;; i happens in the function second-part.
(multiple-value-bind (first-part imaginary-first-part?) (first-part stream)
(if (or imaginary-first-part? (symbolp first-part))
first-part
(second-part first-part stream)))))
;;; Reads a number for the Scheme reader or string-to-number.
(defun read-scheme-number (stream &optional (radix 10))
(let* ((next-char (peek-char* stream))
(possible-number (if (eql next-char #\#)
(progn
(skip-read-char stream)
(%read-special stream :radix radix))
(%read-scheme-number stream radix t))))
(if (numberp possible-number)
possible-number
nil)))
;;; Converts a string to a number using Scheme's numeric syntax. This
;;; is used for string->number.
(defun string-to-number (string &optional (radix 10))
(with-input-from-string (in string)
(handler-case (read-scheme-number in radix)
(scheme-reader-error nil))))
;;;; Misc reader syntax
;;; A line comment skips the rest of the stream unless there is a
;;; newline that ends the comment..
(defun read-line-comment (stream)
(loop :for match := (read-case (stream c)
(#\Newline :newline)
(:eof :eof)
(t nil))
:until match
:finally (return :skip)))
;;; A block comment comments everything between #| and |# and allows
;;; these to be nested. The final |# exits the block comment.
(defun read-block-comment (stream)
(loop :for prior-match := nil :then match
:for match := (read-case (stream c)
(#\| :pipe)
(#\# :special)
(:eof nil)
(t t))
:until (or (not match)
(and (eql prior-match :pipe)
(eql match :special)))
;; Nested block comments must also match
:when (and (eql prior-match :special)
(eql match :pipe))
:do (read-block-comment stream)
:finally (if match
(return :skip)
(eof-error "inside of a block comment"))))
;;; The standard supports these escape characters in strings and a few
;;; other places. For instance, \n becomes a newline.
(define-function (%one-char-escape :inline t) (char stream)
(case char
(#\n (code-char #x000a))
(#\t (code-char #x0009))
(#\a (code-char #x0007))
(#\b (code-char #x0008))
(#\r (code-char #x000d))
;; The main special case is with hex escapes, which are
;; "#\x{number};" where {number} is in base 16. Notice the
;; semicolon terminator.
;;
;; Note: This skips the semicolon, which isn't normally skipped
;; because delimiters aren't skipped.
(#\x
(prog1 (%read-hex-character stream #\;)
(skip-read-char stream)))
;; The other special case is \{whitespace}*{newline} because it
;; needs to skip any whitespace between the slash and the newline,
;; but is only valid if it's only whitespace.
;;
;; For now this just uses the simplified Space-or-Tab whitespace.
((#\Space #\Tab #\Newline)
(if (char= char #\Newline)
nil
(loop :with whitespace? := nil
:for c := (read-case (stream c)
(#\Newline t)
((:or #\Space #\Tab)
(setf whitespace? t)
nil)
(:eof (eof-error "when a newline was expected"))
(t (error 'scheme-reader-error
:details (format nil "A newline expected, but ~A was read." c))))
:until c
:finally (return nil))))
;; Note: \", \\, and \| are specified. The rest are unspecified,
;; but use the CL approach of returning the character itself
;; rather than having an error. That's what this path represents.
(t char)))
;;; Reads a string starting after the initial " that enters the string
;;; reader. A string must end on a non-escaped ".
(defun %read-string (stream)
(loop :for match := (read-case (stream x)
(:eof nil)
(t x))
:for after-escape? := nil :then escape?
:for escape? := (and (eql match #\\)
(not after-escape?))
:with buffer := (make-adjustable-string)
:until (or (not match)
(and (not after-escape?)
(eql match #\")))
:unless escape?
:do (let ((result (if after-escape?
(%one-char-escape match stream)
match)))
(when result
(vector-push-extend result buffer)))
:finally (return (if match
(subseq buffer 0 (fill-pointer buffer))
(eof-error "inside of a string")))))
;;; Determine which base the number is in based on a literal syntax.
;;; For example, #x means that it's in hexadecimal.
(defun %find-read-base (stream &optional (radix 10))
(let ((next-char (peek-char* stream)))
(case next-char
(#\#
(skip-read-char stream)
(read-case (stream match)
((:or #\b #\B) 2)
((:or #\o #\O) 8)
((:or #\d #\D) 10)
((:or #\x #\X) 16)
(:eof (eof-error "after # when a radix was expected"))
(t (error 'scheme-reader-error
:details (format nil "#~A is not a radix" match)))))
(:eof (eof-error "when a number was expected"))
(t radix))))
;;; Reads a number that has a provided base, such as #x for
;;; hexadecimal. If it is followed by #e then it is read as an exact
;;; (non-float) and if it is followed by #i then it is read as an
;;; inexact (float).
(defun %read-in-base (stream base)
(let* ((next-char (peek-char* stream))
(exact? nil)
(exactness (case next-char
(#\#
(skip-read-char stream)
(read-case (stream match)
((:or #\e #\E)
(setf exact? t)
#'exact)
((:or #\i #\I)
#'inexact)
(:eof (eof-error "after # when either E or I was expected"))
(t (error 'scheme-reader-error
:details (format nil "#~A is not an exactness/inexactness" match)))))
(:eof
(eof-error "when a number was expected"))
(t
nil)))
(number (%read-scheme-number stream base t exact?)))
(if exactness
(funcall exactness number)
number)))
(defun %read-hex-character (stream &optional delimiter-char)
(multiple-value-bind (number length)
(read-scheme-integer stream 16)
(error-when (or (zerop length)
(if delimiter-char
(not (eql #\; (peek-char* stream)))
(not (%delimiter? stream))))
'scheme-reader-error
:details "Invalid hexadecimal number.")
(code-char number)))
;;; Literal reader syntax for a character. This is either one
;;; character, like #\a, or it is a hex escape, like #\x42, or it is a
;;; named character, like #\newline.
(defun %read-literal-character (stream)
(read-case (stream c)
((:or #\x #\X)
(if (%delimiter? stream)
c
(%read-hex-character stream)))
(t
(if (%delimiter? stream)
c
(progn
(unread-char c stream)
;; Note: There might be non-interning ways to do this, but
;; this way is extensible.
(case (read-scheme-symbol stream :package (find-package '#:airship-scheme))
(alarm (code-char #x0007))
(backspace (code-char #x0008))
(delete (code-char #x007f))
(escape (code-char #x001b))
(newline (code-char #x000a))
(null (code-char #x0000))
(return (code-char #x000d))
(space (code-char #x0020))
(tab (code-char #x0009))
(t
(error 'scheme-reader-error
:details "Currently, Airship Scheme only supports the required character names."))))))))
(defun %read-directive (stream)
(let ((directive (read-scheme-symbol stream :*fold-case* t)))
(cond ((eql 'fold-case directive)
(setf *fold-case* t)
:skip)
((eql 'no-fold-case directive)
(setf *fold-case* nil)
:skip)
(t (error 'scheme-reader-error
:details (format nil
"Unrecognized directive: #!~A"
(invert-case (symbol-name directive))))))))
;;; This is for #-prefixed tokens that are a number or an error.
(defun %read-special (stream &key (radix 10) first? inside-list? quoted? labels)
(read-case (stream x)
((:or #\e #\E)
(let ((read-base (%find-read-base stream radix)))
(exact (%read-scheme-number stream read-base t t))))
((:or #\i #\I)
(let ((read-base (%find-read-base stream radix)))
(inexact (%read-scheme-number stream read-base t))))
((:or #\b #\B)
(%read-in-base stream 2))
((:or #\o #\O)
(%read-in-base stream 8))
((:or #\d #\D)
(%read-in-base stream 10))
((:or #\x #\X)
(%read-in-base stream 16))
(#\!
(%read-directive stream))
((:range #\0 #\9)
(unread-char x stream)
(let ((label-id (read-scheme-integer stream 10)))
(read-case (stream x)
(#\=
(unless labels
(setf labels (make-hash-table)))
(error-when (hash-table-value-present? label-id labels)
'scheme-reader-error
:details (format nil "The label ~D appears more than once" label-id))
(let ((labeled (read-scheme stream
:inside-list? inside-list?
:quoted? quoted?
:first? first?
:labels labels)))
(error-when (eql labeled :dot)
'scheme-reader-error
:details "Attempted to label a dot")
(error-when (eql labeled :eof)
'scheme-reader-eof-error
:details "when expecting something to label")
(error-when (eql labeled #\))
'scheme-reader-error
:details "Nothing left in list to label")
(setf (gethash label-id labels) labeled)))
(#\#
(error-unless (%delimiter? stream)
'scheme-reader-error
:details "Invalid label syntax")
(error-unless (hash-table-value-present? label-id labels)
'scheme-reader-error
:details (format nil "Attempted to use label ~D before defining it" label-id))
(gethash label-id labels))
(:eof
(eof-error "in the middle of a label"))
(t
(error 'scheme-reader-error
:details "Invalid label syntax")))))
(t
(error 'scheme-reader-error
:details (format nil "Reader syntax #~A is not supported!" x)))))
(defun %read-bytevector* (labels stream)
(handler-case (coerce (read-scheme-list stream :labels labels) 'bytevector?)
(type-error (c)
(error 'scheme-type-error
:details "in reading a bytevector"
:datum (type-error-datum c)
:expected-type 'octet))))
;;; Arbitrary whitespace between #u8 and its parentheses is not
;;; required to be supported.
;;;
;;; Producing this non-conforming syntax is the default behavior in
;;; paredit for Emacs.
;;;
;;; i.e. Paredit produces #u8 () when only #u8() conforms to a strict
;;; reading of section 7.1.1 of R7RS-small.
;;;
;;; If you get this error because of paredit, then you can resolve
;;; this by adding the following to your .emacs file:
;;;
;;; (setq paredit-space-for-delimiter-predicates '((lambda (endp delimiter) nil)))
(defun %read-bytevector (labels stream)
(read-case (stream character)
(#\8 (loop :with whitespace? := nil
:for c := (read-case (stream c)
((:or #\Space #\Tab #\Newline)
(whitespace-in-u8-error whitespace?)
(setf whitespace? t)
nil)
(#\( t)
(:eof (eof-error "when a \"(\" was expected"))
(t (error 'scheme-reader-error
:details (format nil "\"(\" expected, but ~A was read." c))))
:until c
:finally (return (%read-bytevector* labels stream))))
(:eof (eof-error "after #u when an 8 was expected"))
(t (error 'scheme-reader-error
:details (format nil "#u8 expected, but #u~A was read." character)))))
;;; Handles the #;-style comments for `read-scheme-character'.
(defun comment-next-form (inside-list? quoted? first? labels stream)
(let ((skipped-read (read-scheme stream
:inside-list? inside-list?
:quoted? quoted?
:first? first?
:labels labels)))
(case skipped-read
(:dot (error 'scheme-reader-error
:details "Attempted to comment out a dot."))
(#\) (error 'scheme-reader-error
:details "Expected to skip a token to match a #;-style comment, but none found."))
(:eof (eof-error "after a #;-style comment"))))
(read-scheme stream
:inside-list? inside-list?
:quoted? quoted?
:first? first?
:labels labels))
;;; Reads a token that starts with a # (hashtag).
(defun read-special (inside-list? quoted? first? labels stream)
(read-case (stream x)
(#\|
(read-block-comment stream))
((:or #\t #\T)
(if (or (%delimiter? stream)
(and (always "rue" stream)
(%delimiter? stream)))
t
(error 'scheme-reader-error
:details "Invalid character(s) after #t")))
((:or #\f #\F)
(if (or (%delimiter? stream)
(and (always "alse" stream)
(%delimiter? stream)))
%scheme-boolean:f
(error 'scheme-reader-error
:details "Invalid character(s) after #f")))
((:or #\u #\U)
(%read-bytevector labels stream))
(#\\
(%read-literal-character stream))
(#\(
(coerce (read-scheme-list stream :labels labels) 'vector?))
(#\;
(comment-next-form inside-list? quoted? first? labels stream))
(:eof
(eof-error "after a # when a character was expected"))
(t
(unread-char x stream)
(%read-special stream
:first? first?
:inside-list? inside-list?
:quoted? quoted?
:labels labels))))
;;; Reads a Scheme symbol that is escaped with the literal ||
;;; notation, like |foo|.
(defun read-escaped-scheme-symbol (stream &key (package *package*) (*fold-case* *fold-case*))
(loop :for after-escape? := nil :then escape?
:for char := (read-case (stream c)
(#\| (if after-escape? c nil))
(#\\ (if after-escape? c :escape))
(:eof (eof-error "inside of a |"))
(t (let ((result (if after-escape?
(%one-char-escape c stream)
c)))
(or result :skip))))
:for escape? := (eql char :escape)
:with buffer := (make-adjustable-string)
:while char
:unless (not (characterp char))
:do (vector-push-extend char buffer)
:finally (return
(let ((result (subseq buffer 0 (fill-pointer buffer))))
(when *fold-case*
(setf result (string-foldcase result)))
(map-into result #'%invert-case result)
(intern result package)))))
;;; Reads until the delimiter and turns it into a Scheme symbol.
(defun read-scheme-symbol (stream &key (package *package*) prefix (*fold-case* *fold-case*))
(check-type prefix sequence)
(loop :for char := (read-case (stream c)
(#\(
(warn #.(concatenate 'string
"Style warning: There should be a space before "
"a \"(\" if it is directly following a symbol."))
(unread-char c stream)
nil)
((:or #\" #\Space #\Newline #\) #\; #\Tab)
(unread-char c stream)
nil)
(:eof nil)
(t c))
:with buffer := (if prefix
(make-array (length prefix)
:element-type 'character
:adjustable t
:fill-pointer (length prefix)
:initial-contents (map 'string #'%invert-case prefix))
(make-adjustable-string))
:while char
:do (vector-push-extend char buffer)
:finally (return (let ((result (subseq buffer 0 (fill-pointer buffer))))
(when *fold-case*
(setf result (string-foldcase result)))
(map-into result #'%invert-case result)
(intern result package)))))
;;;; Core syntax
;;; A dot can represent a possible number (if not, it's a symbol), a
;;; part of a dotted list, or the start of a symbol.
(defun read-scheme-dot (match inside-list? quoted? first? stream)
(let ((next-char (peek-char* stream)))
(cond ((eql :eof next-char)
(eof-error "after a dot"))
((digit-char-p next-char)
(unread-char match stream)
(%read-scheme-number stream +flonum-base+))
((delimiter? next-char)
(error-unless inside-list?
'scheme-reader-error
:details "The dotted list syntax must be used inside of a list")
(error-when quoted?
'scheme-reader-error
:details "A dot cannot directly follow a quote")
(error-when first?
'scheme-reader-error
:details "An expression needs an item before the dot in a dotted list")
:dot)
(t
(unread-char match stream)
(read-scheme-symbol stream)))))
(defun read-quoted (quote-name inside-list? first? labels stream)
(let ((quoted (read-scheme stream
:inside-list? inside-list?
:quoted? t
:first? first?
:labels labels)))
(error-when (and inside-list? (eql quoted #\)))
'scheme-reader-error
:details "Nothing quoted!")
(error-when (eql quoted :eof)
'scheme-reader-eof-error
:details "after a quote")
`(,quote-name ,quoted)))
;;; Reads a character and determines what to do with it based on the
;;; Scheme syntax specification.
(defun %read-scheme-character (inside-list? quoted? first? labels stream)
(read-case (stream match)
(#\( (read-scheme-list stream :labels labels))
(#\)
(error-unless inside-list?
'scheme-reader-error
:details "Imbalanced parentheses")
#\))
(#\" (%read-string stream))
;; Note: If +read-base+ is not constant, then the digit range
;; would depend on it.
((:or (:range #\0 #\9) #\- #\+)
(unread-char match stream)
(%read-scheme-number stream +read-base+))
((:or #\Newline #\Space #\Tab) :skip)
(#\# (read-special inside-list? quoted? first? labels stream))
(#\' (read-quoted 'quote inside-list? first? labels stream))
(#\` (read-quoted 'quasiquote inside-list? first? labels stream))
(#\, (read-quoted (if (eql #\@ (peek-char* stream))
(progn
(skip-read-char stream)
'unquote-splicing)
'unquote)
inside-list?
first?
labels
stream))
(#\; (read-line-comment stream))
(#\| (read-escaped-scheme-symbol stream))
(:eof
(error-when inside-list?
'scheme-reader-error
:details "Imbalanced parentheses")
:eof)
(#\. (read-scheme-dot match inside-list? quoted? first? stream))
((:or :nd :mc :me)
;; Note: Many Schemes disregard this rule, but this is mandated
;; by section 7.1.1 of r7rs.pdf.
(unicode-reader-error))
(t
(unread-char match stream)
(read-scheme-symbol stream))))
(defun read-scheme-list (stream &key labels)
"Reads a list, with special handling of the dotted list syntax."
(loop :with s-expression := (list)
:with last := (list)
:for match := (read-scheme stream
:inside-list? t
:first? (endp s-expression)
:labels labels)
:for after-dot? := nil :then (or dot? after-dot?)
:for dot? := (eql match :dot)
:with already-dotted? := nil
:until (eql match #\))
:do
(progn
(error-when (and after-dot? dot?)
'scheme-reader-error
:details "More than one dot inside of a dotted list")
(unless dot?
(error-when already-dotted?
'scheme-reader-error
:details "More than one item after a dot in a dotted list")
(let ((new-cons (if after-dot?
(progn
(setf already-dotted? t)