-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathspanio.c
2021 lines (1704 loc) · 69.2 KB
/
spanio.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* #libraryintro
The spanio library.
We use spanio methods when possible and only use null-terminated C strings at interface boundaries where there is no way around it, see the s() pattern.
When we say "print" we mean what prt() does, which is append output to the output span out; it has the printf interface, i.e. format string followed by varargs.
You must flush() before the output will be printed to stdout and be visible to the user.
A common cmpr pattern is prt, flush, getch.
To "complain and exit" means prt, flush, exit(n>0).
A span has a start pointer and an end pointer, called .buf and .end respectively.
- empty(span): If a span is empty (start and end pointers are equal).
- len(span): The length of a span. Prefer this over less clear .end minus .buf.
- init_spans(): Init global spans and buffers; called only from main().
- prt(char *, ...): Formats and appends a string to the output span, i.e. prints it. Pronounced as prt.
- prs(char *, ...): Same as prt, but returns a span (allocated in cmp space).
- w_char(char): Writes a single character.
- wrs(span): Writes the contents of a span.
- bksp(): Backspace, shortens the output span by one.
- sp(): Appends a space character to the output span, i.e. prints a space.
- terpri(): Prints a newline (name courtesy Common Lisp).
- out_sav out2cmp(), out_rst(out_sav): redirect all output functions to cmp (instead of out) and then undo (reset) back to given opaque state reprentation.
- flush(), flush_err(): Flushes the output span to standard output or standard error.
- write_to_file_span(span content, span path, int clobber): Write a span to a file, optionally overwriting.
- write_to_file(span, const char*): Deprecated.
- readable_file(span): whether a file exists as a regular file readable by us.
- read_file_into_span(char*, span): Reads the contents of a file into a span. Deprecated.
- read_file_S_into_span(span, span): Read the contents of a file $1 into a span $2. Used in new code. Returns a span prefix of $2.
- read_file_into_cmp(span): Filename as a span, returns contents as a span inside cmp space.
- read_file_into_inp(span): Filename as a span, returns contents as a span inside inp space.
- advance1(span*), advance(span*, int): Advances the start pointer of a span by one or a specified number of characters.
- shorten1(span*), shorten(span*, int): Shortens a span by one or by a given number of characters.
- find_char(span, char): Searches for a character in a span and returns its first index or -1; find_char_rev(span,char) finds the last index or -1.
- contains(span, span): Checks if one span TEXTUALLY contains another; "abc b"; O(n) string search.
- contains_ptr(span, span): Checks if one span PHYSICALLY contains another; "[[]]"; O(1) pointer comparisons.
- starts_with(span, span): Check if $2 is textual prefix of (or equal to) $1 (mnemonic for arg order: $1 starts-with $2).
- ends_with(span, span): Check if $1 ends with $2.
- consume_prefix(span, span*): Shortens a span by a prefix if present, returning that prefix or nullspan().
- first_n(span, int): Returns n leading chars of a span.
- skip_n(span, int): Returns a new span skipping n initial chars.
- take_n(int, span*): Returns as a new span the first n characters from a span, mutating it; often used when parsing.
- next_line(span*): Extracts the next line (up to \n or .end) from a span and returns it as a new span.
- span_eq(span, span), span_cmp(span, span): Compares two spans for equality or lexicographical order.
- S(char*): Creates a span from a null-terminated string.
- char* s(span): Returns a null-terminated string (in cmp space) containing the given contents.
- char* s_buffer(char*,int,span): Copies $3 into $1 (of length $2) and null-terminates it, returning $1 for convenience.
- nullspan(): Returns the empty span at address 0.
- index_of(span,spans): Return first element of $2 which is span_eq $1, or -1 if none match.
- spanspan(span, span): Finds the first occurrence of a span within another span and returns a span into haystack.
- trim(span): Gives the possibly smaller span with any isspace(3) trimmed on both sides.
- split_whitespace(span): split a span into tokens on whitespace
- concat(span,span): Returns a new span (in cmp space) containing a concatenation.
- parse_int(span): Parse an int, but without altering the span.
- parse_hex(span): Parse a hex value, without altering the span.
- scan_int(span*), scan_hex(span*): Similar, but advances the span past the parsed value.
typedef struct { u8* buf; u8* end; } span; // the type of span
*/
/* #spanio_advanced
These should probably be documented separately.
- skip_whitespace(span*): modifies a span, returning a prefix span of zero or more removed whitespace.
- split_commas_ws(span): splits a span into a spans on commas, stripping whitespace
- w_char_esc(char), w_char_esc_pad(char), w_char_esc_dq(char), w_char_esc_sq(char), wrs_esc(): Write characters (or for wrs_esc, spans) to the output span, applying various escape sequences.
*/
/**/
#define _GNU_SOURCE
#include "siphash/siphash.h"
#include <dirent.h>
typedef uint64_t u64; // we should probably put all these in one place
#define flush_exit(n) flush(); exit(n) // used only by handle_args; let's do this differently
/* #span_ret
The span ret pattern is a common idiom in functions returning span.
Instead of collecting the start and end of the span in separate variables and then constructing a span value to return at the end, we instead declare a span variable called "ret" at the top, and then set the .buf and .end separately, wherever it is convenient to do so (not necessarily in that order), and whenever both have been set, the value is ready and can be returned or used.
*/
/* #const
Note that we NEVER write const in C, as this feature doesn't pull its weight.
There's some existing contamination around library functions but try to minimize the spread.
*/
/* #prt_usage
Note that prt() has exactly the same function signature as printf, i.e. it takes a format string followed by varargs.
We never use printf, but always prt.
A common idiom when reporting errors is to call prt, flush_err, and exit.
To prt a span x we use %.*s with len(x) and x.buf.
If the span would be the only thing in the formatting string, just use wrs(x) (maybe with terpri() after if you need a newline).
*/
/* #span_usage
A span is only two u8* elements, .buf and .end.
Always use len() to get the length of a span.
A common idiom is next_line() in a loop with !empty().
As next_line() leaves out the newline, you can implement cat by using a next_line + empty loop with wrs and terpri in the body.
In a next_line loop, to tell if you are on the last line, since next_line() has already removed the line you are processing, you can test whether the thing you are consuming is empty; if it is, the line next_line gave you was the last line.
*/
/* #spanio_initialization
@- TODO: fill this out (with arenas and whatever else).
In main() or similar it is common to call init_spans and often also read_and_count_stdin.
*/
/* #thran
@- experimental, may go away
A thran has three pointers and can be addressed as two spans which share an endpoint; it is naturally used internally for things like buffers, pipes, and in general anywhere where information is being consumed linearly (usually left-to-right, i.e. ascending addresses in memory, but could be in reverse), for example in parsing.
You can think of it as a span with a progress bar.
typedef struct { u8* buf; u8* end; u8* p; } thran; // a thran holds buf and end but also .p (pointer (or progress))
- thran_of(span): the pointer always refers to some location in between buf and end, here it will be set equal to buf.
- thran_a(thran): returns the "a" part of a thran, i.e. the part up to the pointer (e.g. empty(thran_a(thran_of(x))) for any x).
- thran_b(thran): returns the "b" part, after the pointer, (span_eq (thran_b (thran_of x)) x) is true for any span x.
- thran_full(thran): the dual of thran_of, returns both parts of the thran as a span (discarding the .p information).
*/
/* #generic_array
We have a generic array implementation using arena allocation.
- T will have .a of type E*, and .n, and .cap of type size_t.
- T_alloc(N) returns an array of type T, with .n = 0, .cap = N.
- T_arena_push() and T_arena_pop() manage arena allocation stack; use them as directed.
- Use T_push(T*,E) to push an element onto an array.
*/
/* #spans @generic_array
Our generic array is used to declare a spans type and the associated functions.
*/
/* #s_pattern
Note that in general our spans are NOT null-terminated, so casting a span.buf to a char* and hoping for the best in calling C library functions would be very wrong.
When we need a null-terminated C string for talking to library functions, we can use s_buffer().
We use a local buffer of some suitably generous size, according to the use case.
For example, when used for a path name we should use PATH_MAX.
Here is an example using a size of 2048:
```
char buf[2048] = {0};
s_buffer(buf,2048,some_span);
... use 'buf' ...
```
*/
/* #jsonlib
JSON support in the spanio library.
- json_s(span): prt a JSON string (double-quoted and escaped appropriately).
- json_n(f64): prt a double in JSON format.
- json_b(int): prt a true or false (only 0 is false).
- json_0(): prt a json_null value ("null").
- json_o(): prt an empty json object.
- json_o_extend(json*,span,json): extends $1 with key $2 and value $3.
- json_a(): prt an empty json array.
- json_a_extend(json*,json): extends $1 with key $2.
- all the above json constructor functions return the json type (which they also prt, usually this is sent to cmp space) as in the _{s,n,b,0,o,a} constructors.
- json_{s,n,b,0,o,a}p: full list of json_?p predicate funcs, used to distinguish types of json values.
- (for example) json_sp(json): 1 if $1 is a string, otherwise 0.
- json_key(span, json): lookup on json object.
- json_index(int, json): lookup on json array.
- above lookup functions return a "nullable json".
- mnemonic: the argument order was inspired by partial application.
- int json_is_null(json): returns 0 or 1.
- json_un_s(json): return a span containing the actual value of a json string value (e.g. from json_key or json_index).
- json_parse(span): parse a span into a json and return it; may be shorter only by trimmed whitespace; commonly used.
- make_json(span): return a json wrapper of the span in O(1); the span must be known to be valid json already; rarely used.
- json_s2s(json,span*,u8*): converts json string $1 into an unquoted string in $2 (not exceeding buffer end $3); returns a span.
- json_parse_prefix(span*): not usually called directly, but can be used to parse a json value off the front of a buffer, shortening it.
The json type is a wrapped span which actually contains a JSON-formatted string, allocated in cmp space.
Every json value wraps a span .s, which can be accessed directly whenever the string value of the json is needed, for example when sending as JSON over the wire.
There are constructor functions for all the primitive types, and for the collection types, array and object, there are constructors for the empty collections and extend functions to extend them.
These extend them in place, and are intended for relatively simple applications like building a message for an API call.
There are predicate functions for the json type that distinguish between numbers, arrays, and so on.
(Since the json type just wraps an actual JSON string, these work by looking at the first character of that string, which is definitive; this implies that the json type doesn't include leading or trailing whitespace.)
JSON defines a literal "null" value, but we define a separate "nulljson" distinguished signal value, testable by json_is_null, which indicates some kind of hard failure.
It is returned by all the json-returning functions that can fail, such as indexing an array or object, or parsing a JSON string.
It is simply the json type wrapping a nullspan (the span having .buf = .end = 0).
The function make_json is rarely used and is for "casting" a span to a json object in constant time.
It is normally only used internally in library methods, but can be used if you know you have a JSON string and don't want to parse it again.
The json indexing functions return the json type; that is, the contents are still valid JSON.
In particular, a JSON string will contain JSON string escaping.
If you want the actual string value, you can use json_un_s, which returns a new span in cmp space.
(This uses the lower-level json_s2s, which has a less convenient interface.)
(We should probably have a similar function for getting a number out, but it hasn't been added yet.)
*/
/* #json_design
- all the json constructor functions trim whitespace, so that all the predicate functions follow a pointer and examine one byte.
- the json parser and constant-time wrapper functions are the low-trust and high-trust ways to make a json from a string.
- if the json parser indicates that your span is valid json, that means that one of the json value-type predicates will return true for that json.
- the json value returned from the parser will match the input span except that any whitespace will have been trimmed.
*/
/* includes */
#define _GNU_SOURCE // for memmem
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdarg.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/wait.h>
#include <ctype.h>
#include <limits.h>
#include <termios.h>
#include <errno.h>
#include <time.h>
#include <math.h>
#include <stddef.h>
/* convenient debugging macros */
#define dbgd(x) prt(#x ": %d\n", x),flush()
#define dbgx(x) prt(#x ": %x\n", x),flush()
#define dbgf(x) prt(#x ": %f\n", x),flush()
#define dbgp(x) prt(#x ": %p\n", x),flush()
#define dbgs(x) prt(#x ": %.*s\n", len(x), x.buf),flush()
typedef unsigned char u8;
/* #span
Basic types and function declarations.
A span is two pointers.
Buf points to the first char included in the string.
End points to the first char excluded after the string's end.
These two pointers must point into some space that has been allocated somewhere.
If these two pointers are equal, the string is empty, but it still points to a location.
(So two empty spans are not necessarily the same span, while two empty strings are.)
Neither spans nor their contents are immutable; everything depends on intended use.
Spans frequently point into one of three large buffers, namely inp, out, and cmp.
The inp variable is the span which writes into input_space, and then is the immutable copy of stdin for the duration of the process.
This input may come from stdin or from the filesystem or network, etc.
The number of bytes of input is len(inp).
The output is stored in span out, which points to output_space.
Input processing is generally by reading or parsing inp or subspans of inp.
The output spans are mostly written to with prt() and other IO functions.
The cmp_space and cmp span which points to it are used for model data.
This includes reading and writing data that is synthesized during the program runtime.
These are just the common conventions; your program may use inp, out, and cmp differently.
When writing output, we often see prt followed by flush.
Flush sends to stdout the contents of out (the output span) that have not already been sent.
Usually it is important to do this
- before any operation that blocks, when the user should see the output that we've already written,
- generally immediately after prt when debugging anything,
- after printing any error message and before exiting the program, and
- at the end of main.
If you want to write to stderr, you can use flush_err(), which also flushes from the output_space but to stderr instead of stdout.
(You may need to do a flush() before the call to prt() if you already have pending output that needs to go to stdout.)
*/
typedef struct {
u8 *buf;
u8 *end;
} span;
typedef struct {
u8 *buf;
u8 *end;
u8 *p;
} thran;
#define BUF_SZ (1 << 30)
u8 *input_space; // remains immutable once stdin has been read up to EOF.
u8 *output_space;
u8 *cmp_space;
span out, inp, cmp;
span* outp;
int empty(span);
int len(span);
void init_spans(); // main spanio init function
void init_spans_ioc(size_t,size_t,size_t);
// basic spanio primitives
typedef struct {
span* outp;
} out_sav;
void prt(const char *, ...);
void w_char(char);
void wrs(span);
void bksp();
void sp();
void terpri();
void w_char_esc(char);
void w_char_esc_pad(char);
void w_char_esc_dq(char);
void w_char_esc_sq(char);
void wrs_esc(span);
out_sav out2cmp(); // redirect all output functions (prt, wrs, etc) to cmp instead of out
//out_sav out2atp(span); // redirect to append to a file (creating paths and files if needed)
void out_rst(out_sav); // undo effect of out2cmp or out2atp
void flush();
//void discard(); // experimental, probably going away
void flush_err();
void write_to_file(span content, const char* filename);
int readable_file(span);
span read_file_into_span(char *filename, span buffer);
span read_file_S_into_span(span filename, span buffer);
span read_file_into_cmp(span filename);
void advance1(span*);
void advance(span*,int);
int find_char(span s, char c); int find_char_rev(span s, char c);
int contains(span, span);
span take_n(int, span*);
span next_line(span*);
span first_n(span, int);
int span_eq(span, span);
int span_cmp(span, span);
span S(char*);
span nullspan();
int copy_file(const char *src, const char *dest); // TODO: maybe take spans instead
span inp_compl();
span cmp_compl();
span out_compl();
/* #spanio_basics
input statistics on raw bytes; span basics
This hand-written C code implements most of our span I/O basics.
If we can get an LLM to match this style it's a good result.
*/
int counts[256] = {0};
int empty(span s) {
return s.end == s.buf;
}
inline int len(span s) { return s.end - s.buf; }
thran thran_of(span s) { return (thran){ s.buf, s.end, s.buf }; }
span thran_a(thran t) { return (span){t.buf, t.p}; }
span thran_b(thran t) { return (span){t.p, t.end}; }
span thran_full(thran t) { return (span) {t.buf, t.end}; }
int out_WRITTEN = 0, cmp_WRITTEN = 0;
void init_spans() {
init_spans_ioc(BUF_SZ,BUF_SZ,BUF_SZ);
}
void init_spans_ioc(size_t i, size_t o, size_t c) {
input_space = malloc(i);
output_space = malloc(o);
cmp_space = malloc(c);
out.buf = output_space;
out.end = output_space;
inp.buf = input_space;
inp.end = input_space;
cmp.buf = cmp_space;
cmp.end = cmp_space;
outp = &out;
}
void bksp() { (*outp).end--; }
void sp() { w_char(' '); }
span head_n(int n, span *io) {
span ret;
ret.buf = io->buf;
ret.end = io->buf + n;
io->buf += n;
return ret;
}
int span_eq(span s1, span s2) {
if (len(s1) != len(s2)) return 0;
for (int i = 0; i < len(s1); ++i) if (s1.buf[i] != s2.buf[i]) return 0;
return 1;
}
int span_cmp(span s1, span s2) {
for (;;) {
if (empty(s1) && !empty(s2)) return 1;
if (empty(s2) && !empty(s1)) return -1;
if (empty(s1)) return 0;
int dif = *(s1.buf++) - *(s2.buf++);
if (dif) return dif;
}
}
span S(char *s) {
span ret = {(u8*)s, (u8*)s + strlen(s) };
return ret;
}
char* s_buffer(char* buf, int n, span s) {
size_t l = (n - 1) < len(s) ? (n - 1) : len(s);
memmove(buf, s.buf, l);
buf[l] = '\0';
return buf;
}
char* s(span s) {
if (len(s) && s.end[-1] == '\0') return (char*)s.buf;
char* ret = (char*)cmp.end;
out_sav o = out2cmp();
wrs(s);
w_char('\0');
out_rst(o);
return ret;
}
void read_and_count_stdin() {
int c;
while ((c = getchar()) != EOF) {
//if (c == ' ') continue;
assert(c != 0);
counts[c]++;
*inp.buf = c;
inp.buf++;
if (len(inp) == BUF_SZ) { prt("input overflow\n"); flush_err(); exit(1); }
}
inp.end = inp.buf;
inp.buf = input_space;
}
/*
span saved_out[16] = {0};
int saved_out_stack = 0;
void redir(span new_out) {
assert(saved_out_stack < 15);
saved_out[saved_out_stack++] = out;
out = new_out;
}
span reset() {
assert(saved_out_stack);
span ret = out;
out = saved_out[--saved_out_stack];
return ret;
}
*/
// set if debugging some crash
const int ALWAYS_FLUSH = 0;
// Note: this doesn't swap output_space, which means manual comparisons with output_space + BUF_SZ will be broken?
// probably an argument for the "thran"
// actually we should just be using out.buf + BUF_SZ anyway I suppose
//void swapcmp() { span swap = cmp; cmp = out; out = swap; int swpn = cmp_WRITTEN; cmp_WRITTEN = out_WRITTEN; out_WRITTEN = swpn; }
//void prt2cmp() { if (out.buf == output_space) swapcmp(); }
//void prt2std() { if (out.buf == cmp_space) swapcmp(); }
//span prt_cmp_stack[1024] = {0};
//int prt_cmp_stack_n = 0;
//void prt_cmp() { assert(prt_cmp_stack_n < 1023); prt_cmp_stack[prt_cmp_stack_n++] = out; out = cmp; }
//void prt_pop() { assert(0 < prt_cmp_stack_n); out = prt_cmp_stack[--prt_cmp_stack_n]; }
/* C convenience methods
We have a copy_file already here.
We add mkdir_p and pathpart just to simplify out2atp.
*/
/* #copy_file
The copy_file function copies the contents from one file to another.
It operates by opening the source file for reading and the destination file for writing.
The function reads chunks of data into a buffer and writes them out to the destination file, handling potential interruptions due to signals.
It also performs error checks at each step, including during file opening, reading, and writing.
If an error occurs, the function closes any open file descriptors and returns a negative error code corresponding to the step where the failure occurred.
*/
int copy_file(const char *src, const char *dest) {
int source_fd, dest_fd;
ssize_t n_read, n_written;
char buffer[4096];
source_fd = open(src, O_RDONLY);
if (source_fd < 0) {
return -1; // Error opening source file
}
dest_fd = open(dest, O_WRONLY | O_CREAT | O_TRUNC, 0666);
if (dest_fd < 0) {
close(source_fd);
return -2; // Error opening destination file
}
while ((n_read = read(source_fd, buffer, sizeof(buffer))) > 0) {
char *out_ptr = buffer;
ssize_t n_left = n_read;
while (n_left > 0) {
n_written = write(dest_fd, out_ptr, n_left);
if (n_written <= 0) {
if (errno == EINTR) {
continue; // Retry if interrupted by signal
}
close(source_fd);
close(dest_fd);
return -3; // Error writing to destination file
}
n_left -= n_written;
out_ptr += n_written;
}
}
close(source_fd);
close(dest_fd);
if (n_read == 0) { // Successfully copied
return 0;
} else {
return -4; // Error reading from source file
}
}
/* #mkdir_p
void mkdir_p(span dir) {
// find the first occurrence, if any, of the char "/", which ends the leading path component of dir
// if there is no such occurrence, we are done, return
// use chdir(2) to change the cwd
// if this doesn't work because the directory does not exist, then create it (and then cd into it after all)
// continue to loop over the remaining part of the span after the "/"
// finally we return to the directory which we were originally in (which we must have saved earlier in `old_cwd` using getcwd and a PATH_MAX-sized buffer).
}
First we store the u8* cmp.end, so we don't leak cmp space.
On the last line of the function (or before any early return) we must remember to reset cmp.end = end.
@- probably we should have a very unsafe s() version that re-uses a single static buffer, since this is library stuff it's ok if it's hard to use
If any of our system calls fails, we will immediately print any filename argument (using prt, flush_err) and then print the OS error message using perror("mkdir_p") and finally exit(1).
*/
void mkdir_p(span dir) {
u8* end = cmp.end;
fprintf(stderr, "%p", end);
char old_cwd[PATH_MAX];
if (getcwd(old_cwd, sizeof(old_cwd)) == NULL) {
prt("Failed to get current working directory");
flush_err();
perror("mkdir_p");
exit(1);
}
span remaining = dir;
while (!empty(remaining)) {
int idx = find_char(remaining, '/');
if (idx == -1) break;
span component = take_n(idx, &remaining);
advance1(&remaining); // skip the "/"
char path[PATH_MAX];
s_buffer(path, PATH_MAX, component);
if (chdir(path) != 0) {
if (mkdir(path, 0755) != 0 || chdir(path) != 0) {
prt("%.*s", len(component), component.buf);
flush_err();
perror("mkdir_p");
exit(1);
}
}
}
if (chdir(old_cwd) != 0) {
prt("Failed to return to directory: %s", old_cwd);
flush_err();
perror("mkdir_p");
exit(1);
}
cmp.end = end;
fprintf(stderr, "%p", cmp.end);
}
/* #pathpart
This is just a convenience method getting the longest known-path component of a span.
This is simply the prefix of dir that ends with the last slash it contains.
span pathpart(span dir) {
// find the last slash in dir
// if none, return the empty span located at dir.buf
// return a span starting from dir.buf and ending with the slash offset plus one (so that it is included)
}
(Note that this function always returns a subspan of dir, and only a null span if dir is the null span.)
*/
span pathpart(span dir) {
int last_slash = find_char_rev(dir, '/');
if (last_slash == -1) {
return (span){ .buf = dir.buf, .end = dir.buf };
}
return (span){ .buf = dir.buf, .end = dir.buf + last_slash + 1 };
}
/* spanio basics
*/
out_sav out2cmp() { out_sav ret = {0}; ret.outp = outp; outp = &cmp; return ret; }
//out_sav out2atp(span p) { out_sav ret = {0}; ret.outp = outp; outp = &cmp; mkdir_p(pathpart(p)); char buffer[4096]; s_buffer(buffer, 4096, pathpart(p)); ret.fcls = open(buffer, O_WRONLY | O_CREAT | O_APPEND); return ret; }
//void out_rst(out_sav sav) { outp = sav.outp; /*if (sav.fcls) close(sav.fcls);*/ if (sav.prev_target) flush_target = sav.prev_target; }
/*
out_sav out2atp(span p) {
out_sav ret = {0};
ret.outp = outp;
outp = &cmp;
//fprintf(stderr,"before: %p\n",cmp.end);
mkdir_p(pathpart(p));
//fprintf(stderr,"after: %p\n",cmp.end);
char buffer[4096];
s_buffer(buffer, 4096, p);
int fd = open(buffer, O_WRONLY | O_CREAT | O_APPEND, 0644);
if (fd < 0) {
perror("out2atp");
exit(1);
}
ret.prev_target = flush_target;
flush_target = fdopen(fd, "a");
if (!flush_target) {
perror("out2atp");
exit(1);
}
return ret;
}
*/
void out_rst(out_sav sav) {
//flush();
outp = sav.outp;
//if (sav.fcls) close(sav.fcls);
//if (sav.prev_target) {
//fclose(flush_target);
//flush_target = sav.prev_target;
//}
}
void prt(const char * fmt, ...) {
va_list ap;
va_start(ap, fmt);
char *buffer;
// we used to use vsprintf here, but that adds a null byte that we don't want
int n = vasprintf(&buffer, fmt, ap);
memcpy(outp->end, buffer, n);
free(buffer);
outp->end += n;
if (outp->buf + BUF_SZ < outp->end) {
printf("OUTPUT OVERFLOW (%ld)\n", outp->end - outp->buf);
exit(7);
}
va_end(ap);
if (ALWAYS_FLUSH) flush();
}
span prs(char * fmt, ...) {
va_list ap;
va_start(ap, fmt);
span ret = { .buf = cmp.end };
char *buffer;
// we used to use vsprintf here, but that adds a null byte that we don't want
int n = vasprintf(&buffer, fmt, ap);
if (BUF_SZ < len(cmp) + n) {
printf("CMP OVERFLOW (%d)\n", len(cmp) + n);
exit(7);
}
memcpy(cmp.end, buffer, n);
free(buffer);
cmp.end += n;
va_end(ap);
if (ALWAYS_FLUSH) flush();
ret.end = cmp.end;
return ret;
}
void terpri() {
*outp->end = '\n';
outp->end++;
if (ALWAYS_FLUSH) flush();
}
void w_char(char c) {
*outp->end++ = c;
}
void w_char_esc(char c) {
if (c < 0x20 || c == 127) {
outp->end += sprintf((char*)outp->end, "\\%03o", (u8)c);
} else {
*outp->end++ = c;
}
}
void w_char_esc_pad(char c) {
if (c < 0x20 || c == 127) {
outp->end += sprintf((char*)outp->end, "\\%03o", (u8)c);
} else {
sp();sp();sp();
*outp->end++ = c;
}
}
void w_char_esc_dq(char c) {
if (c < 0x20 || c == 127) {
outp->end += sprintf((char*)outp->end, "\\%03o", (u8)c);
} else if (c == '"') {
*outp->end++ = '\\';
*outp->end++ = '"';
} else if (c == '\\') {
*outp->end++ = '\\';
*outp->end++ = '\\';
} else {
*outp->end++ = c;
}
}
void w_char_esc_sq(char c) {
if (c < 0x20 || c == 127) {
outp->end += sprintf((char*)outp->end, "\\%03o", (u8)c);
} else if (c == '\'') {
*outp->end++ = '\\';
*outp->end++ = '\'';
} else if (c == '\\') {
*outp->end++ = '\\';
*outp->end++ = '\\';
} else {
*outp->end++ = c;
}
}
void wrs(span s) {
for (u8 *c = s.buf; c < s.end; c++) w_char(*c);
}
void wrs_esc(span s) {
for (u8 *c = s.buf; c < s.end; c++) w_char_esc(*c);
}
void flush() {
int *WRITTEN = (output_space < outp->end && outp->end < output_space + BUF_SZ) ? &out_WRITTEN : &cmp_WRITTEN;
if (*WRITTEN < len(*outp)) {
//fprintf(flush_target,"%.*s", len(*outp) - *WRITTEN, outp->buf + *WRITTEN);
fwrite(outp->buf + *WRITTEN, 1, len(*outp) - *WRITTEN, stdout);
*WRITTEN = len(*outp);
fflush(stdout);
}
}
void discard() {
int *WRITTEN = (output_space < outp->end && outp->end < output_space + BUF_SZ) ? &out_WRITTEN : &cmp_WRITTEN;
*WRITTEN = len(*outp);
}
void flush_err() {
int *WRITTEN = (output_space < outp->end && outp->end < output_space + BUF_SZ) ? &out_WRITTEN : &cmp_WRITTEN;
if (*WRITTEN < len(*outp)) {
fprintf(stderr, "%.*s", len(*outp) - *WRITTEN, outp->buf + *WRITTEN);
*WRITTEN = len(*outp);
fflush(stderr);
}
}
/*
In write_to_file we open a file, which must not exist, and write the contents of a span into it, and close it.
If the file exists or there is any other error, we prt(), flush(), and exit as per usual.
In write_to_file_span we simply take the same two arguments but the filename is a span.
We build a null-terminated string and call write_to_file.
*/
void write_to_file_2(span, const char*, int);
void write_to_file(span content, const char* filename) {
write_to_file_2(content, filename, 0);
}
void write_to_file_2(span content, const char* filename, int clobber) {
// Attempt to open the file with O_CREAT and O_EXCL to ensure it does not already exist
/* clobber thing is a manual fixup */
int flags = O_WRONLY | O_CREAT | O_TRUNC;
if (!clobber) flags |= O_EXCL;
int fd = open(filename, flags, 0644);
if (fd == -1) {
if (clobber) {
prt("Error opening %s for writing: File cannot be created or opened.\n", filename);
} else {
prt("Error opening %s for writing: File already exists or cannot be created.\n", filename);
}
flush();
exit(EXIT_FAILURE);
}
// Write the content of the span to the file
ssize_t written = write(fd, content.buf, len(content));
if (written != len(content)) {
// Handle partial write or write error
prt("Error writing to file %s.\n", filename);
flush();
close(fd); // Attempt to close the file before exiting
exit(EXIT_FAILURE);
}
// Close the file
if (close(fd) == -1) {
prt("Error closing %s after writing.\n", filename);
flush();
exit(EXIT_FAILURE);
}
}
void write_to_file_span(span content, span filename_span, int clobber) {
char filename[filename_span.end - filename_span.buf + 1];
memcpy(filename, filename_span.buf, filename_span.end - filename_span.buf);
filename[filename_span.end - filename_span.buf] = '\0';
write_to_file_2(content, filename, clobber);
}
/* not really any better for usability I think
span read_f_into_span(span filename, span* buffer) {
span ret = read_file_S_into_span(filename, *buffer);
buffer->buf = ret.end;
}
*/
/* #readable_file @s_buffer
int readable_file(span path);
We use s_buffer pattern with PATH_MAX and do a stat.
If the file doesn't exist, isn't a normal file, or isn't readable by us we return 0, otherwise 1.
*/
int readable_file(span path) {
char buffer[PATH_MAX];
s_buffer(buffer, PATH_MAX, path);
struct stat sb;
if (stat(buffer, &sb) != 0) return 0;
if (!S_ISREG(sb.st_mode)) return 0;
if (access(buffer, R_OK) != 0) return 0;
return 1;
}
span read_file_into_cmp(span filename) {
span ret = read_file_S_into_span(filename, cmp_compl());
cmp.end = ret.end;
return ret;
}
span read_file_S_into_span(span filename, span buffer) {
char path[2048];
s_buffer(path,2048,filename);
return read_file_into_span(path, buffer);
}
span read_file_into_span(char* filename, span buffer) {
// Open the file
int fd = open(filename, O_RDONLY);
if (fd == -1) {
prt("Failed to open %s\n", filename);
flush_err();
exit(1);
}
// Get the file size
struct stat statbuf;
if (fstat(fd, &statbuf) == -1) {
close(fd);
prt("Failed to get file size for %s\n", filename);
flush_err();exit(1);
}
// Check if the file's size fits into the provided buffer
size_t file_size = statbuf.st_size;
if (file_size > len(buffer)) {
close(fd);
prt("File content for %s does not fit into the provided buffer\n", filename);
flush_err();exit(1);
}
// Read file contents into the buffer
ssize_t bytes_read = read(fd, buffer.buf, file_size);
if (bytes_read == -1) {
close(fd);
prt("Failed to read file contents for %s\n", filename);
flush_err();exit(1);
}
// Close the file
if (close(fd) == -1) {
prt("Failed to close file %s\n", filename);
flush_err();exit(1);
}
// Create and return a new span that reflects the read content
span new_span = {buffer.buf, buffer.buf + bytes_read};
return new_span;
}
/*
u8 *save_stack[16] = {0};
int save_count = 0;
void save() {
push(out);
}
span pop_into_span() {
span ret;
ret.buf = save_stack[--save_count];
ret.end = out.end;
return ret;
}
void push(span s) {
save_stack[save_count++] = s.buf;
}
void pop(span *s) {
s->buf = save_stack[--save_count];
}
*/
/*
take_n is a mutating function which takes the first n chars of the span into a new span, and also modifies the input span to remove this same prefix.
After a function call such as `span new = take_n(x, s)`, it will be the case that `new` contatenated with `s` is equivalent to `s` before the call.
*/
span take_n(int n, span *io) {
span ret;
ret.buf = io->buf;
ret.end = io->buf + n;
io->buf += n;
return ret;
}
void advance1(span *s) {
if (!empty(*s)) s->buf++;
}
void advance(span *s, int n) {
if (len(*s) >= n) s->buf += n;
else s->buf = s->end; // Move to the end if n exceeds span length
}
void shorten1(span *s) {
if (!empty(*s)) s->end--;
}
void shorten(span *s, int n) {
if (n <= len(*s)) s->end -= n;
else s->end = s->buf;
}
int contains(span haystack, span needle) {
/*
prt("contains() haystack:\n");
wrs(haystack);terpri();
prt("needle:\n");
wrs(needle);terpri();
*/
if (len(haystack) < len(needle)) {
return 0; // Needle is longer, so it cannot be contained
}
void *result = memmem(haystack.buf, haystack.end - haystack.buf, needle.buf, needle.end - needle.buf);
return result != NULL ? 1 : 0;
}
int contains_ptr(span a, span b) {
return a.buf <= b.buf && b.end <= a.end;
}