forked from jessek/hashdeep
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.h
976 lines (838 loc) · 32.6 KB
/
main.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
/*
* main.h:
*
* This is the main file included by all other modules in md5deep/hashdeep/etc.
*
* It includes:
* common.h - the common system include files
* xml.h - the C++ XML system.
* hash function headers
*
* C++ STL stuff.
*
* It then creates all the C++ classes and structures used.
*
* $Id$
*/
#ifndef __MAIN_H
#define __MAIN_H
#include "common.h"
#include "xml.h"
#ifdef HAVE_PTHREAD
#include "threadpool.h"
#endif
#include <map>
#include <vector>
#if !defined(VERSION) && defined(PACKAGE_VERSION)
#define VERSION PACKAGE_VERSION
#endif
#define VERBOSE 1
#define MORE_VERBOSE 2
#define INSANELY_VERBOSE 3
/* These describe the version of the file format being used, not
* the version of the program.
*/
#define HASHDEEP_PREFIX "%%%% "
#define HASHDEEP_HEADER_10 "%%%% HASHDEEP-1.0"
/* HOW TO ADD A NEW HASHING ALGORITHM
* Add a value for the algorithm to the hashid_t enumeration
* Add the functions to compute the hashes. There should be three functions,
an initialization route, an update routine, and a finalize routine.
The convention, for an algorithm "foo", is
foo_init, foo_update, and foo_final.
* Add your new code to Makefile.am under hashdeep_SOURCES
* Add a call to insert the algorithm in state::load_hashing_algorithms
* See if you need to increase MAX_ALGORITHM_NAME_LENGTH or
MAX_ALGORITHM_CONTEXT_SIZE for your algorithm in common.h
* Update the usage function and man page to include the function
*/
typedef enum {
alg_md5=0,
alg_sha1,
alg_sha256,
alg_tiger,
alg_whirlpool,
alg_sha3,
alg_xxhash,
// alg_unknown must always be last in this list. It's used
// as a loop terminator in many functions.
alg_unknown
} hashid_t;
inline std::ostream & operator << (std::ostream &os,const hashid_t &h)
{
switch (h)
{
case alg_md5: os << "alg_md5" ; break ;
case alg_sha1: os << "alg_sha1" ; break ;
case alg_sha256: os << "alg_sha256" ; break ;
case alg_tiger: os << "alg_tiger" ; break ;
case alg_whirlpool: os << "alg_whirlpool" ; break ;
case alg_sha3: os << "alg_sha3" ; break ;
case alg_xxhash: os << "alg_xxhash"; break;
case alg_unknown: os << "alg_unknown" ; break ;
}
return os;
}
#define NUM_ALGORITHMS alg_unknown
/* Which ones are enabled by default */
#define DEFAULT_ENABLE_MD5 TRUE
#define DEFAULT_ENABLE_SHA1 FALSE
#define DEFAULT_ENABLE_SHA256 TRUE
#define DEFAULT_ENABLE_TIGER FALSE
#define DEFAULT_ENABLE_WHIRLPOOL FALSE
#define DEFAULT_ENABLE_SHA3 FALSE
#define DEFAULT_ENABLE_XXHASH FALSE
class iomode {
public:;
static const int buffered=0; // use fopen, fread, fclose
static const int unbuffered=1; // use open, read, close
static const int mmapped=2; // use open, mmap, close
static int toiomode(const std::string &str){
if(str=="0" || str[0]=='b') return iomode::buffered;
if(str=="1" || str[0]=='u') return iomode::unbuffered;
if(str=="2" || str[0]=='m') return iomode::mmapped;
std::cerr << "Invalid iomode '" << str << "'";
assert(0);
return iomode::unbuffered; // default
}
};
/* This class holds the information known about each hash algorithm.
* It's sort of like the EVP system in OpenSSL.
*
* In version 3 the list of known hashes was stored here as well.
* That has been moved to the hashlist database (further down).
*
* Right now we are using some global variables; the better way to do this
* would be with a C++ singleton.
*
* Perhaps the correct way to do this would be a global C++ vector of objects?
*/
class algorithm_t {
public:
bool inuse; // true if we are using this algorithm
std::string name; // name of algorithm
size_t bit_length; // 128 for MD5
hashid_t id; // usually the position in the array...
/* The hashing functions */
void ( *f_init)(void *ctx);
void ( *f_update)(void *ctx, const unsigned char *buf, size_t len );
void ( *f_finalize)(void *ctx, unsigned char *);
/* The methods */
static void add_algorithm(hashid_t pos, const char *name, uint16_t bits,
void ( *func_init)(void *ctx),
void ( *func_update)(void *ctx, const unsigned char *buf, size_t len ),
void ( *func_finalize)(void *ctx, unsigned char *),
int inuse);
static void load_hashing_algorithms();
static void clear_algorithms_inuse();
static void enable_hashing_algorithms(std::string var); // enable the algorithms in 'var'; var can be 'all'
static hashid_t get_hashid_for_name(std::string name); // return the hashid_t for 'name'
static bool valid_hex(const std::string &buf); // returns true if buf contains only hex characters
static bool valid_hash(hashid_t alg,const std::string &buf); // returns true if buf is a valid hash for hashid_t a
static int algorithms_in_use_count(); // returns count of algorithms in use
};
extern algorithm_t hashes[NUM_ALGORITHMS]; // which hash algorithms are available and in use
/** status_t describes exit codes for the program
*
*/
class status_t {
private:
int32_t code;
public:;
status_t():code(0){};
static const int32_t status_ok = EXIT_SUCCESS; // 0
static const int32_t status_EXIT_FAILURE = EXIT_FAILURE;
static const int32_t status_out_of_memory = -2;
static const int32_t status_invalid_hash = -3;
static const int32_t status_unknown_error = -4;
static const int32_t status_omg_ponies = -5;
/*
* Return values for the program
* RBF - Document these return values for hashdeep
* A successful run has these or'ed together
*/
static const int32_t STATUS_UNUSED_HASHES = 1;
static const int32_t STATUS_INPUT_DID_NOT_MATCH = 2;
static const int32_t STATUS_USER_ERROR = 64;
static const int32_t STATUS_INTERNAL_ERROR = 128;
void add(int32_t val){ code |= val; }
void set(int32_t val){ code = val; }
int32_t get_status(){ return code; }
bool operator==(int32_t v){ return this->code==v; }
bool operator!=(int32_t v){ return this->code!=v; }
};
#ifdef _WIN32
typedef __time64_t timestamp_t;
typedef std::wstring filename_t;
#else
typedef time_t timestamp_t;
typedef std::string filename_t;
#endif
/**
* file_metadata_t contains metadata information about a file.
* It also includes a stat call that returns the inode information
* and link count even on windows, where the API is different than stat.
* Note that we only include information we care about in this program
*
* this is in dig.cpp.
*/
/* strangely, we define our own file types */
typedef enum {
stat_regular=0,
stat_directory,
stat_door,
stat_block,
stat_character,
stat_pipe,
stat_socket,
stat_symlink,
stat_unknown=254
} file_types;
class file_metadata_t {
public:
static file_types decode_file_type(const struct __stat64 &sb);
// stat a file, print an error and return -1 if it fails, otherwise return 0
static int stat(const filename_t &path,file_metadata_t *m,class display &ocb, bool is_symlink = false);
class fileid_t { // uniquely defines a file on this system
public:
fileid_t():dev(0),ino(0){};
fileid_t(uint64_t dev_,uint64_t ino_):dev(dev_),ino(ino_){};
uint64_t dev; // device number
uint64_t ino; // inode number
};
file_metadata_t():fileid(),nlink(0),size(0),ctime(0),mtime(0),atime(0){};
file_metadata_t(fileid_t fileid_,uint64_t nlink_,uint64_t size_,timestamp_t ctime_,timestamp_t mtime_,
timestamp_t atime_):fileid(fileid_),nlink(nlink_),size(size_),ctime(ctime_),mtime(mtime_),atime(atime_){};
fileid_t fileid;
uint64_t nlink;
uint64_t size;
timestamp_t ctime;
timestamp_t mtime;
timestamp_t atime;
};
/** file_data_t contains information about a file.
* It can be created by hashing an actual file, or by reading a hash file a file of hashes.
* The object is simple so that the built in C++ shallow copy will make a proper copy of it.
* Note that all hashes are currently stored as a hex string. That incurs a 2x memory overhead.
* This will be changed.
*/
class file_data_t {
public:
file_data_t():file_bytes(0),matched_file_number(0){
};
virtual ~file_data_t(){} // required because we subclass
std::string hash_hex[NUM_ALGORITHMS]; // the hash in hex of the entire file
std::string hash512_hex[NUM_ALGORITHMS]; // hash of the first 512 bytes, for triage mode
std::string file_name; // just the file_name; native on POSIX; UTF-8 on Windows.
uint64_t file_bytes; // how many bytes were actually read
uint64_t matched_file_number; // file number that we matched.; 0 if no match
};
/**
* hash_context stores information for a specific hash.
* which may for a piece of a file or an entire file
*/
class hash_context_obj {
public:;
hash_context_obj():read_offset(0),read_len(0){}
/* Information for the hashing underway */
uint8_t hash_context[NUM_ALGORITHMS][MAX_ALGORITHM_CONTEXT_SIZE];
/* The actual hashing */
void multihash_initialize();
void multihash_update(const unsigned char *buffer,size_t bufsize);
void multihash_finalize(std::string dest[]);
// for piecewise hashing: where this segment was actually read
uint64_t read_offset; // where the segment we read started
uint64_t read_len; // how many bytes were read and hashed
};
/** file_data_hasher_t is a subclass of file_data_t.
* It contains additional information necessary to actually hash a file.
*/
class file_data_hasher_t : public file_data_t {
private:
static uint64_t next_file_number;
static mutex_t fdh_lock;
public:
uint64_t stat_megs() const { // return how many megabytes is the file in MB?
return stat_bytes / ONE_MEGABYTE;
}
static const size_t MD5DEEP_IDEAL_BLOCK_SIZE = 131072;
file_data_hasher_t(class display *ocb_):
file_is_symlink(false),
ocb(ocb_), // where we put results
handle(0),
fd(-1),
base(0),bounds(0), // for mmap
file_number(0),ctime(0),mtime(0),atime(0),stat_bytes(0),
start_time(0),last_time(0),eof(false),workerid(-1){
file_number = ++next_file_number;
};
virtual ~file_data_hasher_t(){
if(handle){
fclose(handle);
handle = 0;
}
if(fd){
#ifdef HAVE_MMAP
if(base) munmap((void *)base,bounds);
#endif
close(fd);
fd = 0;
}
}
bool is_stdin(){ return handle==stdin; }
/* The actual file to hash */
filename_t file_name_to_hash;
bool file_is_symlink;
/* Where the results go */
class display *ocb;
/* How we read the data */
FILE *handle; // the file we are reading
int fd; // fd used for unbuffered and mmap
const unsigned char *base; // base of mapped file
size_t bounds; // size of the mapped file
std::string triage_info; // if true, must print on output
std::stringstream dfxml_hash; // the DFXML hash digest for the piece just hashed;
// used to build piecewise
uint64_t file_number;
void append_dfxml_for_byterun();
void compute_dfxml(bool known_hash,const hash_context_obj *hc);
timestamp_t ctime; // ctime; previously 'timestamp'
timestamp_t mtime;
timestamp_t atime;
// How many bytes (and megs) we think are in the file, via stat(2)
// and how many bytes we've actually read in the file
uint64_t stat_bytes; // how much stat returned
/* When we started the hashing, and when was the last time a display was printed,
* for printing status updates.
*/
time_t start_time, last_time; // of hashing
bool eof; // end of file encountered while reading
int workerid; // my worker id, or -1 if there is none
void set_workerid(int id){workerid=id;}
/* multithreaded hash implementation is these functions in hash.cpp.
* hash() is called to hash each file and record the results.
* Return codes are both stored in display return_code and returned
* 0 - for success, -1 for error
*/
// called to actually do the computation; returns true if successful
// and fills in the read_offset and read_len
void dfxml_timeout(const std::string &tag,const timestamp_t &val);
void dfxml_write_hashes(std::string hex_hashes[],int indent);
bool compute_hash(uint64_t request_start,uint64_t request_len,hash_context_obj *segment,hash_context_obj *file);
void hash(); // called to hash each file and record results
};
/** The hashlist holds a list of file_data_t objects.
* state->known is used to hold the audit file that is loaded.
* state->seen is used to hold the hashes seen on the current run.
* We store multiple maps for each algorithm number which map the hash hex code
* to the pointer as well.
*
* the hashlist.cpp file contains the implementation. It's largely taken
* from the v3 audit.cpp and match.cpp files.
*/
class hashlist : public std::vector<file_data_t *> {
/**
* The largest number of columns we can expect in a file of hashes
* (knowns). Normally this should be the number of hash
* algorithms plus a column for file size, file name, and, well,
* some fudge factors. Any values after this number will be
* ignored. For example, if the user invokes the program as:
*
* hashdeep -c md5,md5,md5,md5,...,md5,md5,md5,md5,md5,md5,md5,whirlpool
*
* the whirlpool will not be registered.
*/
public:;
static const int MAX_KNOWN_COLUMNS= NUM_ALGORITHMS+ 6;
typedef enum {
/* return codes from loading a hash list */
loadstatus_ok = 0,
status_unknown_filetype,
status_contains_bad_hashes,
status_contains_no_hashes,
status_file_error
} loadstatus_t;
typedef enum {
searchstatus_ok = 0,
/* Matching hashes */
status_match, // all hashes match
status_partial_match, /* One or more hashes match, but not all */
status_file_size_mismatch, /* Implies all hashes match */
status_file_name_mismatch, /* Implies all hashes and file size match */
status_no_match /* Implies none of the hashes match */
} searchstatus_t;
static const char *searchstatus_to_str(searchstatus_t val);
// Types of files that contain known hashes
typedef enum {
file_plain,
file_bsd,
file_hashkeeper,
file_nsrl_15,
file_nsrl_20,
file_encase3,
file_encase4,
file_ilook,
// Files generated by md5deep with the ten digit filesize at the start
// of each line
file_md5deep_size,
file_hashdeep_10,
file_unknown
} hashfile_format;
class hashmap : public std::multimap<std::string,file_data_t *> {
public:;
void add_file(file_data_t *fi,int alg_num);
};
hashmap hashmaps[NUM_ALGORITHMS];
/****************************************************************
** Search functions follow
** It's not entirely clear why we have two search functions, but we do.
** Perhaps one is from md5deep and the other is from hashdeep
****************************************************************/
/**
* hashlist.cpp
* find_hash finds the 'best match', which ideally is a match for both the hash and the filename.
*/
file_data_t *find_hash(hashid_t alg,const std::string &hash_hex,
const std::string &file_name,
uint64_t file_number);
/**
* look up a fdt by hash code(s) and return if it is present or not.
* optionally return a pointer to it as well.
*/
searchstatus_t search(const file_data_hasher_t *fdht, file_data_t ** matched, bool case_sensitive) ;
uint64_t total_matched(); // return the total matched from all calls to search()
/****************************************************************/
/**
* Figure out the format of a hashlist file and load it.
* Both of these functions take the file name and the open handle.
* They read from the handle and just use the filename for printing error messages.
*/
void enable_hashing_algorithms_from_hashdeep_file(class display *ocb,
const std::string &fn,std::string val);
std::string last_enabled_algorithms; // a string with the algorithms that were enabled last
hashid_t hash_column[NUM_ALGORITHMS]; // maps a column number to a hashid;
// the order columns appear in the file being loaded.
uint8_t filename_column; // Column number which should contain the filename
hashfile_format identify_format(class display *ocb,const std::string &fn,FILE *handle);
loadstatus_t load_hash_file(class display *ocb,const std::string &fn); // not tstring! always ASCII
void dump_hashlist(); // send contents to stdout
/**
* add_fdt adds a file_data_t record to the hashlist, and its hashes to all the hashmaps.
* @param fi - a file_data_t to add. Don't erase it; we're going to use it (and modify it)
*/
void add_fdt(file_data_t *fi);
};
/* Primary modes of operation (primary_function) */
typedef enum {
primary_compute=0,
primary_match=1,
primary_match_neg=2,
primary_audit=3
} primary_t;
// These are the types of files that we can match against
#define TYPE_PLAIN 0
#define TYPE_BSD 1
#define TYPE_HASHKEEPER 2
#define TYPE_NSRL_15 3
#define TYPE_NSRL_20 4
#define TYPE_ILOOK 5
#define TYPE_ILOOK3 6
#define TYPE_ILOOK4 7
#define TYPE_MD5DEEP_SIZE 8
#define TYPE_ENCASE 9
#define TYPE_UNKNOWN 254
/* audit mode stats */
class audit_stats {
public:
audit_stats():exact(0), expect(0), partial(0), moved(0), unused(0), unknown(0), total(0){
};
/* For audit mode, the number of each type of file */
uint64_t exact, expect, partial; //
uint64_t moved, unused, unknown, total; //
void clear(){
exact = 0;
expect = 0;
partial = 0;
moved = 0;
unused = 0;
unknown = 0;
total = 0;
}
};
/** display describes how information is output.
* There is only one OCB (it is a singleton).
* It needs to be mutex protected.
*
* The hashing happens in lots of threads and then calls the output
* classes in output_control_block to actually do the outputing. The
* problem here is that one of the things that is done is looking up,
* so the searches into "known" and "seen" also need to be
* protected. Hence "known" and "seen" appear in the
* output_control_block, and not elsewhere, and all of the access to
* them needs to be mediated.
*
* It also needs to maintain all of the state for audit mode.
* Finally, it maintains options for reading
* (e.g. buffered, unbuffered, or memory-mapped I/O)
*
* It is a class because it is protected and is passed around.
*/
class display {
private:
mutable mutex_t M; // lock for anything in output section
void lock() const { M.lock(); }
void unlock() const { M.unlock(); }
/* all display state variables are protected by M and must be private */
std::ostream *out; // where things get sent
std::ofstream myoutstream; // if we open it
std::string utf8_banner; // banner to be displayed
bool banner_displayed; // has the header been shown (text output)
XML *dfxml; /* output in DFXML */
/* The set of known values; typically read from the audit file */
hashlist known; // hashes read from the -k file
hashlist seen; // hashes seen on this hashing run; from the command line
class audit_stats match; // for the audit mode
status_t return_code; // prevously returned by hash() and dig().
public:
display():
out(&std::cout),
banner_displayed(0),dfxml(0),
mode_triage(false),
mode_not_matched(false),mode_quiet(false),mode_timestamp(false),
mode_barename(false),
mode_size(false),mode_size_all(false),
opt_silent(false),
opt_verbose(0),
opt_estimate(false),
opt_relative(false),
opt_unicode_escape(false),
opt_mode_match(false),
opt_mode_match_neg(false),
opt_csv(false),
opt_asterisk(false),
opt_zero(false),
opt_display_size(false),
opt_display_hash(false),
opt_show_matched(false),
opt_case_sensitive(true),
opt_readlink(false),
opt_iomode(iomode::buffered), // by default, use buffered
#ifdef HAVE_PTHREAD
opt_threadcount(threadpool::numCPU()),
tp(0),
#else
opt_threadcount(0),
#endif
size_threshold(0),
piecewise_size(0),
primary_function(primary_compute){
}
/* These variables are read-only after threading starts */
bool mode_triage;
bool mode_not_matched;
bool mode_quiet;
bool mode_timestamp;
bool mode_barename;
bool mode_size;
bool mode_size_all;
std::string opt_outfilename;
bool opt_silent;
int opt_verbose;
bool opt_estimate;
bool opt_relative;
bool opt_unicode_escape;
bool opt_mode_match;
bool opt_mode_match_neg;
bool opt_csv;
bool opt_asterisk;
bool opt_zero;
bool opt_display_size;
bool opt_display_hash;
bool opt_show_matched;
bool opt_case_sensitive;
bool opt_readlink;
int opt_iomode;
int opt_threadcount;
#ifdef HAVE_PTHREAD
threadpool *tp;
#endif
// When only hashing files larger/smaller than a given threshold
uint64_t size_threshold;
uint64_t piecewise_size; // non-zero for piecewise mode
primary_t primary_function; /* what do we want to do? */
/* Functions for working */
void set_outfilename(std::string outfilename);
/* Return code support */
int32_t get_return_code(){ lock(); int ret = return_code.get_status(); unlock(); return ret; }
void set_return_code(status_t code){ lock(); return_code = code; unlock(); }
void set_return_code(int32_t code){ lock(); return_code.set(code); unlock(); }
void set_return_code_if_not_ok(status_t code){
lock();
if(code!=status_t::status_ok) return_code = code;
unlock();
}
/* DFXML support */
void xml_open(FILE *out_){
lock();
dfxml = new XML(out_);
unlock();
}
void dfxml_startup(int argc,char **argv);
void dfxml_shutdown();
void dfxml_timeout(const std::string &tag,const timestamp_t &val);
void dfxml_write(file_data_hasher_t *fdht);
/* Known hash database interface */
/* Display the unused files and return the count */
uint64_t compute_unused(bool show_display,std::string annotation);
void set_utf8_banner(std::string utf8_banner_){
utf8_banner = utf8_banner_;
}
static mutex_t portable_gmtime_mutex;
struct tm *portable_gmtime(struct tm *my_time,const timestamp_t *t);
void try_msg(void);
void display_banner_if_needed();
void display_match_result(file_data_hasher_t *fdht,const hash_context_obj *hc);
void md5deep_display_match_result(file_data_hasher_t *fdht,const hash_context_obj *hc);
void md5deep_display_hash(file_data_hasher_t *fdht,const hash_context_obj *hc);
void display_hash(file_data_hasher_t *fdht,const hash_context_obj *hc);
void display_hash_simple(file_data_hasher_t *fdt,const hash_context_obj *hc);
/* The following routines are for printing and outputing filenames.
*
* fmt_filename formats the filename.
* On Windows this version outputs as UTF-8 unless unicode quoting is requested,
* in which case Unicode characters are emited as U+xxxx.
* For example, the Unicode smiley character ☺ is output as U+263A.
*
*/
std::string fmt_size(const file_data_t *fdh) const;
std::string fmt_filename(const std::string &fn) const;
#ifdef _WIN32
std::string fmt_filename(const std::wstring &fn) const;
#endif
std::string fmt_filename(const file_data_t *fdt) const {
return fmt_filename(fdt->file_name);
}
void writeln(std::ostream *s,const std::string &str); // writes a line with NEWLINE and locking
// Display an ordinary message with newline added
void status(const char *fmt, ...) __attribute__((format(printf, 2, 0))); // note that 1 is 'self'
// Display an error message if not in silent mode
void error(const char *fmt, ...) __attribute__((format(printf, 2, 0)));
// Display an error message if not in silent mode and exit
void fatal_error(const char *fmt, ...) __attribute__((format(printf, 2, 0))) __attribute__ ((__noreturn__));
// Display an error message, ask user to contact the developer,
void internal_error(const char *fmt, ...) __attribute__((format(printf, 2, 0))) __attribute__ ((__noreturn__));
void print_debug(const char *fmt, ...) __attribute__((format(printf, 2, 0)));
void error_filename(const std::string &fn, const char *fmt, ...) __attribute__((format(printf, 3, 0))) ;
#ifdef _WIN32
void error_filename(const std::wstring &fn, const char *fmt, ...) __attribute__((format(printf, 3, 0)));
#endif
/* these versions extract the filename and the annotation if it is present.
*/
/* known hash database and realtime stats.
* Note that this is not locked() and unlocked().
* It can only be run from the main thread before fork.
*/
hashlist::loadstatus_t load_hash_file(const std::string &fn){
hashlist::loadstatus_t ret = known.load_hash_file(this,fn);
return ret;
}
/** These are multi-threaded */
uint64_t known_size() const {
lock();
uint64_t ret= known.size();
unlock();
return ret;
}
const file_data_t *find_hash(hashid_t alg,const std::string &hash_hex,
const std::string &file_name,
uint64_t file_number){
lock();
const file_data_t *ret = known.find_hash(alg,hash_hex,file_name,file_number);
unlock();
return ret;
}
void clear_realtime_stats();
void display_realtime_stats(const file_data_hasher_t *fdht,const hash_context_obj *hc,time_t elapsed);
bool hashes_loaded() const{ lock(); bool ret = known.size()>0; unlock(); return ret; }
void add_fdt(file_data_t *fdt){ lock(); known.add_fdt(fdt); unlock(); }
/* audit mode */
int audit_update(file_data_hasher_t *fdt);
int audit_check(); // performs an audit; return 0 if pass, -1 if fail
void display_audit_results(); // sets return code if fails
void finalize_matching();
/* hash.cpp: Actually trigger the hashing. */
void hash_file(const tstring &file_name, file_types type);
void hash_stdin();
void dump_hashlist(){ lock(); known.dump_hashlist(); unlock(); }
};
/**
* The 'state' class holds the state of the hashdeep/md5deep program.
* This includes:
* startup parameters
* known - the list of hashes in the hash database.
* seen - the list of hashes that have been seen this time through.
*/
class global {
public:
static tstring getcwd(); // returns the current directory
static tstring get_realpath(const tstring &fn); // returns the full path
static std::string get_realpath8(const tstring &fn); // returns the full path in UTF-8
static std::string escape_utf8(const std::string &fn); // turns "⦿" to "U+29BF"
#ifdef _WIN32
static std::string make_utf8(const std::wstring &tfn) ;
#endif
static std::string make_utf8(const std::string &tfn){return tfn;}
};
/* On Win32, allow output of wstr's by converting them to UTF-8 */
#ifdef _WIN32
inline std::ostream & operator <<(std::ostream &os,const std::wstring &wstr) {
os << global::make_utf8(wstr);
return os;
}
#endif
class state {
public:;
state():mode_recursive(false), // do we recurse?
mode_warn_only(false), // for loading hash files
// these determine which files get hashed
mode_expert(false),
mode_regular(false),
mode_directory(false),
mode_door(false),
mode_block(false),
mode_character(false),
mode_pipe(false),
mode_socket(false),
mode_symlink(false),
mode_winpe(false),
// command line argument
argc(0),argv(0),
// these have something to do with hash files that are loaded
h_field(0),
h_plain(0),h_bsd(0),
h_md5deep_size(0),
h_hashkeeper(0),h_ilook(0),h_ilook3(0),h_ilook4(0), h_nsrl20(0), h_encase(0),
usage_count(0) // allows -hh to print extra help
{};
bool mode_recursive;
bool mode_warn_only;
// which files do we hash.
bool mode_expert;
bool mode_regular;
bool mode_directory;
bool mode_door;
bool mode_block;
bool mode_character;
bool mode_pipe;
bool mode_socket;
bool mode_symlink;
bool mode_winpe;
/* Command line arguments */
std::string opt_input_list; // file with a list of files to read
int argc;
#ifdef _WIN32
wchar_t **argv; // never allocated, never freed
#else
char **argv;
#endif
// configuration and output
display ocb; // output control block
// Which filetypes this algorithm supports and their position in the file
uint8_t h_field; // which field to extract from a hash file.
uint8_t h_plain, h_bsd, h_md5deep_size, h_hashkeeper;
uint8_t h_ilook, h_ilook3, h_ilook4, h_nsrl20, h_encase;
void md5deep_add_hash(char *h, char *fn); // explicitly add a hash
void setup_expert_mode(char *arg);
/* main.cpp */
uint64_t find_block_size(std::string input_str);
int usage_count;
bool opt_enable_mac_cc;
tstring generate_filename(const tstring &input);
void hashdeep_usage();
std::string make_banner();
void md5deep_usage();
void hashdeep_check_flags_okay();
void check_wow64();
void md5deep_check_flags_okay();
int hashdeep_process_command_line(int argc,char **argv);
void md5deep_check_matching_modes();
void hashdeep_check_matching_modes();
int md5deep_process_command_line(int argc,char **argv);
#ifdef _WIN32
int prepare_windows_command_line();
#endif
/* files.cpp
* Not quite sure what to do with this stuff yet...
*/
void md5deep_load_match_file(const char *fn);
int find_hash_in_line(char *buf, int fileType, char *filename);
int parse_encase_file(const char *fn,FILE *f,uint32_t num_expected_hashes);
int find_plain_hash(char *buf,char *known_fn); // returns FALSE if error
int find_md5deep_size_hash(char *buf, char *known_fn);
int find_bsd_hash(char *buf, char *fn);
int find_rigid_hash(char *buf, char *fn, unsigned int fn_location, unsigned int hash_location);
int find_ilook_hash(char *buf, char *known_fn);
int check_for_encase(FILE *f,uint32_t *expected_hashes);
/* dig.cpp
*
* Note the file typing system needs to be able to display errors...
*/
class dir_table_t : public std::set<tstring>{
};
dir_table_t dir_table;
void done_processing_dir(const tstring &fn_);
void processing_dir(const tstring &fn_);
bool have_processed_dir(const tstring &fn_);
int identify_hash_file_type(FILE *f,uint32_t *expected_hashes); // identify the hash file type
bool should_hash_symlink(const tstring &fn,file_types *link_type);
bool should_hash_winpe(const tstring &fn);
bool should_hash_expert(const tstring &fn, file_types type);
bool should_hash(const tstring &fn, file_types &type);
/* file_type returns the file type of a string.
* If an error is found and ocb is provided, send the error to ocb.
* If filesize and timestamp are provided, give them.
*/
static file_types file_type(const filename_t &fn,class display *ocb,uint64_t *filesize,
timestamp_t *ctime,timestamp_t *mtime,timestamp_t *atime);
#ifdef _WIN32
bool is_junction_point(const std::wstring &fn);
#endif
void clean_name_posix(std::string &fn);
void process_dir(const tstring &path);
void dig_normal(const tstring &path); // posix & win32
void dig_win32(const tstring &path); // win32 only; calls dig_normal
static void dig_self_test();
bool hashes_loaded(){
return ocb.hashes_loaded();
}
int main(int argc,char **argv); // main
void sanity_check(int condition,const char *msg);
};
/**
* the files class knows how to read various hash file types
*/
/* Due to an inadvertant code fork several years ago, this program has different usage
* and output when run as 'md5deep' then when run as 'hashdeep'. We call this the
* 'md5deep_mode' and track it with the variables below.
*/
/* main.cpp */
extern bool md5deep_mode; // if true, then we were run as md5deep, sha1deep, etc.
extern int opt_debug; // for debugging
extern hashid_t opt_md5deep_mode_algorithm; // for when we are in MD5DEEP mode
std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems);
std::vector<std::string> split(const std::string &s, char delim);
void lowercase(std::string &s);
extern std::string progname; // formerly const char *__progname
// ------------------------------------------------------------------
// HELPER FUNCTIONS
//
// helper.cpp
// ------------------------------------------------------------------
void chop_line(char *s);
off_t find_file_size(FILE *f,class display *ocb); // Return the size, in bytes of an open file stream. On error, return -1
// ------------------------------------------------------------------
// MAIN PROCESSING
// ------------------------------------------------------------------
/* dig.cpp */
void dig_self_test(); // check the string-processing
#endif /* ifndef __MAIN_H */