Skip to content

Commit 58fdf25

Browse files
committed
Fix issue TobiasPSP#11
1 parent 635fc55 commit 58fdf25

File tree

2 files changed

+82
-82
lines changed

2 files changed

+82
-82
lines changed

Diff for: PSOneTools/2.4/Find-PSOneDuplicateFile.ps1

+36-36
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
1010
.EXAMPLE
1111
$Path = [Environment]::GetFolderPath('MyDocuments')
12-
Find-PSOneDuplicateFile -Path $Path
12+
Find-PSOneDuplicateFile -Path $Path
1313
Find duplicate files in the user documents folder
1414
1515
.EXAMPLE
16-
Find-PSOneDuplicateFile -Path c:\windows -Filter *.log
16+
Find-PSOneDuplicateFile -Path c:\windows -Filter *.log
1717
find log files in the Windows folder with duplicate content
1818
1919
.LINK
@@ -27,35 +27,35 @@
2727
[String]
2828
[Parameter(Mandatory)]
2929
$Path,
30-
31-
# Filter to apply. Default is '*' (all Files)
30+
31+
# Filter to apply. Default is '*' (all Files)
3232
[String]
3333
$Filter = '*'
3434
)
3535

3636
# get a hashtable of all files of size greater 0
3737
# grouped by their length
38-
39-
38+
39+
4040
# ENUMERATE ALL FILES RECURSIVELY
4141
# call scriptblocks directly and pipe them together
4242
# this is by far the fastest way and much faster than
4343
# using Foreach-Object:
44-
& {
44+
& {
4545
try
4646
{
4747
# try and use the fast API way of enumerating files recursively
4848
# this FAILS whenever there is any "Access Denied" errors
4949
Write-Progress -Activity 'Acquiring Files' -Status 'Fast Method'
50-
[IO.DirectoryInfo]::new($Path).GetFiles('*', 'AllDirectories')
50+
[IO.DirectoryInfo]::new($Path).GetFiles($Filter, 'AllDirectories')
5151
}
5252
catch
5353
{
5454
# use PowerShell's own (slow) way of enumerating files if any error occurs:
5555
Write-Progress -Activity 'Acquiring Files' -Status 'Falling Back to Slow Method'
5656
Get-ChildItem -Path $Path -File -Recurse -ErrorAction Ignore
5757
}
58-
} |
58+
} |
5959
# EXCLUDE EMPTY FILES:
6060
# use direct process blocks with IF (which is much faster than Where-Object):
6161
& {
@@ -68,37 +68,37 @@
6868
$_
6969
}
7070
}
71-
} |
71+
} |
7272
# GROUP FILES BY LENGTH, AND RETURN ONLY FILES WHERE THERE IS AT LEAST ONE
7373
# OTHER FILE WITH SAME SIZE
7474
# use direct scriptblocks with own hashtable (which is much faster than Group-Object)
75-
& {
76-
begin
75+
& {
76+
begin
7777
# start with an empty hashtable
78-
{ $hash = @{} }
78+
{ $hash = @{} }
7979

80-
process
81-
{
80+
process
81+
{
8282
# group files by their length
8383
# (use "length" as hashtable key)
8484
$file = $_
8585
$key = $file.Length.toString()
86-
86+
8787
# if we see this key for the first time, create a generic
8888
# list to hold group items, and store FileInfo objects in this list
8989
# (specialized generic lists are faster than ArrayList):
90-
if ($hash.ContainsKey($key) -eq $false)
90+
if ($hash.ContainsKey($key) -eq $false)
9191
{
9292
$hash[$key] = [Collections.Generic.List[System.IO.FileInfo]]::new()
9393
}
9494
# add file to appropriate hashtable key:
9595
$hash[$key].Add($file)
96-
}
97-
98-
end
99-
{
96+
}
97+
98+
end
99+
{
100100
# return only the files from groups with at least two files
101-
# (if there is only one file with a given length, then it
101+
# (if there is only one file with a given length, then it
102102
# cannot have any duplicates for sure):
103103
foreach($pile in $hash.Values)
104104
{
@@ -109,8 +109,8 @@
109109
$pile
110110
}
111111
}
112-
}
113-
} |
112+
}
113+
} |
114114
# CALCULATE THE NUMBER OF FILES TO HASH
115115
# collect all files and hand over en-bloc
116116
& {
@@ -119,58 +119,58 @@
119119
# GROUP FILES BY HASH, AND RETURN ONLY HASHES THAT HAVE AT LEAST TWO FILES:
120120
# use a direct scriptblock call with a hashtable (much faster than Group-Object):
121121
& {
122-
begin
122+
begin
123123
{
124124
# start with an empty hashtable
125125
$hash = @{}
126-
126+
127127
# since this is a length procedure, a progress bar is in order
128128
# keep a counter of processed files:
129129
$c = 0
130130
}
131-
131+
132132
process
133133
{
134134
$totalNumber = $_.Count
135135
foreach($file in $_)
136136
{
137-
137+
138138
# update progress bar
139139
$c++
140-
140+
141141
# update progress bar every 20 files:
142142
if ($c % 20 -eq 0)
143143
{
144144
$percentComplete = $c * 100 / $totalNumber
145145
Write-Progress -Activity 'Hashing File Content' -Status $file.Name -PercentComplete $percentComplete
146146
}
147-
147+
148148
# use the file hash of this file PLUS file length as a key to the hashtable
149149
# use the fastest algorithm SHA1
150150
$result = Get-FileHash -Path $file.FullName -Algorithm SHA1
151151
$key = '{0}:{1}' -f $result.Hash, $file.Length
152-
152+
153153
# if we see this key the first time, add a generic list to this key:
154154
if ($hash.ContainsKey($key) -eq $false)
155155
{
156156
$hash.Add($key, [Collections.Generic.List[System.IO.FileInfo]]::new())
157157
}
158-
158+
159159
# add the file to the approriate group:
160160
$hash[$key].Add($file)
161161
}
162162
}
163-
163+
164164
end
165165
{
166166
# remove all hashtable keys with only one file in them
167-
167+
168168
# first, CLONE the list of hashtable keys
169169
# (we cannot remove hashtable keys while enumerating the live
170170
# keys list):
171171
# remove keys
172172
$keys = @($hash.Keys).Clone()
173-
173+
174174
# enumerate all keys...
175175
foreach($key in $keys)
176176
{
@@ -180,7 +180,7 @@
180180
$hash.Remove($key)
181181
}
182182
}
183-
183+
184184
# return the hashtable with only duplicate files left:
185185
$hash
186186
}

0 commit comments

Comments
 (0)