Skip to content

Feature: Automatically detect zip encoding #17045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 22, 2025
1 change: 1 addition & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
<PackageVersion Include="Microsoft.Graphics.Win2D" Version="1.3.2" />
<PackageVersion Include="TagLibSharp" Version="2.3.0" />
<PackageVersion Include="Tulpep.ActiveDirectoryObjectPicker" Version="3.0.11" />
<PackageVersion Include="UTF.Unknown" Version="2.5.1" />
<PackageVersion Include="WinUIEx" Version="2.5.1" />
<PackageVersion Include="Vanara.Windows.Extensions" Version="4.0.1" />
<PackageVersion Include="Vanara.Windows.Shell" Version="4.0.1" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ public override async Task ExecuteAsync(object? parameter = null)

var isArchiveEncrypted = await FilesystemTasks.Wrap(() => StorageArchiveService.IsEncryptedAsync(archive.Path));
var isArchiveEncodingUndetermined = await FilesystemTasks.Wrap(() => StorageArchiveService.IsEncodingUndeterminedAsync(archive.Path));
Encoding? detectedEncoding = null;
if (isArchiveEncodingUndetermined)
{
detectedEncoding = await FilesystemTasks.Wrap(() => StorageArchiveService.DetectEncodingAsync(archive.Path));
}
var password = string.Empty;
Encoding? encoding = null;

Expand All @@ -51,7 +56,8 @@ public override async Task ExecuteAsync(object? parameter = null)
{
IsArchiveEncrypted = isArchiveEncrypted,
IsArchiveEncodingUndetermined = isArchiveEncodingUndetermined,
ShowPathSelection = true
ShowPathSelection = true,
DetectedEncoding = detectedEncoding,
};
decompressArchiveDialog.ViewModel = decompressArchiveViewModel;

Expand Down
9 changes: 8 additions & 1 deletion src/Files.App/Data/Contracts/IStorageArchiveService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,17 @@ public interface IStorageArchiveService
/// <summary>
/// Gets the value that indicates whether the archive file's encoding is undetermined.
/// </summary>
/// <param name="archiveFilePath">The archive file path to check if the item is encrypted.</param>
/// <param name="archiveFilePath">The archive file path to check if the encoding is undetermined.</param>
/// <returns>True if the archive file's encoding is undetermined; otherwise, false.</returns>
Task<bool> IsEncodingUndeterminedAsync(string archiveFilePath);

/// <summary>
/// Detect encoding for a zip file whose encoding is undetermined.
/// </summary>
/// <param name="archiveFilePath">The archive file path to detect encoding</param>
/// <returns>Null if the archive file doesn't need to detect encoding or its encoding can't be detected; otherwise, the encoding detected.</returns>
Task<Encoding?> DetectEncodingAsync(string archiveFilePath);

/// <summary>
/// Gets the <see cref="SevenZipExtractor"/> instance from the archive file path.
/// </summary>
Expand Down
43 changes: 41 additions & 2 deletions src/Files.App/Data/Items/EncodingItem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public sealed class EncodingItem
/// Initializes a new instance of the <see cref="EncodingItem"/> class.
/// </summary>
/// <param name="code">The code of the language.</param>
public EncodingItem(string code)
public EncodingItem(string? code)
{
if (string.IsNullOrEmpty(code))
{
Expand All @@ -36,6 +36,45 @@ public EncodingItem(string code)
}
}

public override string ToString() => Name;
public EncodingItem(Encoding encoding, string name)
{
Encoding = encoding;
Name = name;
}

public static EncodingItem[] Defaults = new string?[] {
null,//System Default
"UTF-8",

//All possible Windows system encodings
//reference: https://en.wikipedia.org/wiki/Windows_code_page
//East Asian
"shift_jis", //Japanese
"gb2312", //Simplified Chinese
"big5", //Traditional Chinese
"ks_c_5601-1987", //Korean

//Southeast Asian
"Windows-1258", //Vietnamese
"Windows-874", //Thai

//Middle East
"Windows-1256", //Arabic
"Windows-1255", //Hebrew
"Windows-1254", //Turkish

//European
"Windows-1252", //Western European
"Windows-1250", //Central European
"Windows-1251", //Cyrillic
"Windows-1253", //Greek
"Windows-1257", //Baltic

"macintosh",
}
.Select(x => new EncodingItem(x))
.ToArray();

public override string ToString() => Name;
}
}
1 change: 1 addition & 0 deletions src/Files.App/Files.App.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
<PackageReference Include="Microsoft.Graphics.Win2D" />
<PackageReference Include="TagLibSharp" />
<PackageReference Include="Tulpep.ActiveDirectoryObjectPicker" />
<PackageReference Include="UTF.Unknown" />
<PackageReference Include="WinUIEx" />
<PackageReference Include="Vanara.Windows.Extensions" />
<PackageReference Include="Vanara.Windows.Shell" />
Expand Down
130 changes: 84 additions & 46 deletions src/Files.App/Services/Storage/StorageArchiveService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
using ICSharpCode.SharpZipLib.Zip;
using SevenZip;
using System.IO;
using System.Linq;
using System.Text;
using UtfUnknown;
using Windows.Storage;
using Windows.Win32;

Expand Down Expand Up @@ -90,7 +90,8 @@ public async Task<bool> CompressAsync(ICompressArchiveModel compressionModel)
/// <inheritdoc/>
public Task<bool> DecompressAsync(string archiveFilePath, string destinationFolderPath, string password = "", Encoding? encoding = null)
{
if(encoding == null){
if (encoding == null)
{
return DecompressAsyncWithSevenZip(archiveFilePath, destinationFolderPath, password);
}
else
Expand Down Expand Up @@ -203,22 +204,22 @@ async Task<bool> DecompressAsyncWithSharpZipLib(string archiveFilePath, string d
string.IsNullOrEmpty(destinationFolderPath))
return false;
using var zipFile = new ZipFile(archiveFilePath, StringCodec.FromEncoding(encoding));
if(zipFile is null)
if (zipFile is null)
return false;
if(!string.IsNullOrEmpty(password))

if (!string.IsNullOrEmpty(password))
zipFile.Password = password;

// Initialize a new in-progress status card
var statusCard = StatusCenterHelper.AddCard_Decompress(
archiveFilePath.CreateEnumerable(),
destinationFolderPath.CreateEnumerable(),
ReturnResult.InProgress);

// Check if the decompress operation canceled
if (statusCard.CancellationToken.IsCancellationRequested)
return false;

StatusCenterItemProgressModel fsProgress = new(
statusCard.ProgressEventSource,
enumerationCompleted: true,
Expand All @@ -233,51 +234,52 @@ async Task<bool> DecompressAsyncWithSharpZipLib(string archiveFilePath, string d
{
long processedBytes = 0;
int processedFiles = 0;

foreach (ZipEntry zipEntry in zipFile)
await Task.Run(async () =>
{
if (statusCard.CancellationToken.IsCancellationRequested)
foreach (ZipEntry zipEntry in zipFile)
{
isSuccess = false;
break;
}

if (!zipEntry.IsFile)
{
continue; // Ignore directories
}
if (statusCard.CancellationToken.IsCancellationRequested)
{
isSuccess = false;
break;
}

string entryFileName = zipEntry.Name;
string fullZipToPath = Path.Combine(destinationFolderPath, entryFileName);
string directoryName = Path.GetDirectoryName(fullZipToPath);
if (!zipEntry.IsFile)
{
continue; // Ignore directories
}

if (!Directory.Exists(directoryName))
{
Directory.CreateDirectory(directoryName);
}
string entryFileName = zipEntry.Name;
string fullZipToPath = Path.Combine(destinationFolderPath, entryFileName);
string directoryName = Path.GetDirectoryName(fullZipToPath);

byte[] buffer = new byte[4096]; // 4K is a good default
using (Stream zipStream = zipFile.GetInputStream(zipEntry))
using (FileStream streamWriter = File.Create(fullZipToPath))
{
await ThreadingService.ExecuteOnUiThreadAsync(() =>
if (!Directory.Exists(directoryName))
{
fsProgress.FileName = entryFileName;
fsProgress.Report();
});
Directory.CreateDirectory(directoryName);
}

StreamUtils.Copy(zipStream, streamWriter, buffer);
}
processedBytes += zipEntry.Size;
if (fsProgress.TotalSize > 0)
{
fsProgress.Report(processedBytes / (double)fsProgress.TotalSize * 100);
byte[] buffer = new byte[4096]; // 4K is a good default
using (Stream zipStream = zipFile.GetInputStream(zipEntry))
using (FileStream streamWriter = File.Create(fullZipToPath))
{
await ThreadingService.ExecuteOnUiThreadAsync(() =>
{
fsProgress.FileName = entryFileName;
fsProgress.Report();
});

StreamUtils.Copy(zipStream, streamWriter, buffer);
}
processedBytes += zipEntry.Size;
if (fsProgress.TotalSize > 0)
{
fsProgress.Report(processedBytes / (double)fsProgress.TotalSize * 100);
}
processedFiles++;
fsProgress.AddProcessedItemsCount(1);
fsProgress.Report();
}
processedFiles++;
fsProgress.AddProcessedItemsCount(1);
fsProgress.Report();
}

});
if (!statusCard.CancellationToken.IsCancellationRequested)
{
isSuccess = true;
Expand Down Expand Up @@ -321,7 +323,7 @@ await ThreadingService.ExecuteOnUiThreadAsync(() =>
return isSuccess;
}


/// <inheritdoc/>
public string GenerateArchiveNameFromItems(IReadOnlyList<ListedItem> items)
{
Expand Down Expand Up @@ -355,7 +357,7 @@ public async Task<bool> IsEncodingUndeterminedAsync(string archiveFilePath)
{
using (ZipFile zipFile = new ZipFile(archiveFilePath))
{
return !zipFile.Cast<ZipEntry>().All(entry=>entry.IsUnicodeText);
return !zipFile.Cast<ZipEntry>().All(entry => entry.IsUnicodeText);
}
}
catch (Exception ex)
Expand All @@ -365,6 +367,42 @@ public async Task<bool> IsEncodingUndeterminedAsync(string archiveFilePath)
}
}

public async Task<Encoding?> DetectEncodingAsync(string archiveFilePath)
{
//Temporarily using cp437 to decode zip file
//because SharpZipLib requires an encoding when decoding
//and cp437 contains all bytes as character
//which means that we can store any byte array as cp437 string losslessly
var cp437 = Encoding.GetEncoding(437);
try
{
using (ZipFile zipFile = new ZipFile(archiveFilePath, StringCodec.FromEncoding(cp437)))
{
var fileNameBytes = cp437.GetBytes(
String.Join("\n",
zipFile.Cast<ZipEntry>()
.Where(e => !e.IsUnicodeText)
.Select(e => e.Name)
)
);
var detectionResult = CharsetDetector.DetectFromBytes(fileNameBytes);
if (detectionResult.Detected != null && detectionResult.Detected.Confidence > 0.5)
{
return detectionResult.Detected.Encoding;
}
else
{
return null;
}
}
}
catch (Exception ex)
{
Console.WriteLine($"SharpZipLib error: {ex.Message}");
return null;
}
}

/// <inheritdoc/>
public async Task<SevenZipExtractor?> GetSevenZipExtractorAsync(string archiveFilePath, string password = "")
{
Expand Down
3 changes: 3 additions & 0 deletions src/Files.App/Strings/en-US/Resources.resw
Original file line number Diff line number Diff line change
Expand Up @@ -2099,6 +2099,9 @@
<data name="Encoding" xml:space="preserve">
<value>Encoding</value>
</data>
<data name="EncodingDetected" xml:space="preserve">
<value>{0} (detected)</value>
</data>
<data name="ExtractToPath" xml:space="preserve">
<value>Path</value>
</data>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ public bool IsArchiveEncodingUndetermined
set => SetProperty(ref isArchiveEncodingUndetermined, value);
}

private Encoding? detectedEncoding;
public Encoding? DetectedEncoding
{
get => detectedEncoding;
set {
SetProperty(ref detectedEncoding, value);
RefreshEncodingOptions();
}
}

private bool showPathSelection;
public bool ShowPathSelection
{
Expand All @@ -53,19 +63,27 @@ public bool ShowPathSelection

public DisposableArray? Password { get; private set; }

public EncodingItem[] EncodingOptions { get; set; } = new string?[] {
null,//System Default
"UTF-8",
"shift_jis",
"gb2312",
"big5",
"ks_c_5601-1987",
"Windows-1252",
"macintosh",
}
.Select(x=>new EncodingItem(x))
.ToArray();
public EncodingItem[] EncodingOptions { get; set; } = EncodingItem.Defaults;
public EncodingItem SelectedEncoding { get; set; }
void RefreshEncodingOptions()
{
if (detectedEncoding != null)
{
EncodingOptions = EncodingItem.Defaults
.Prepend(new EncodingItem(
detectedEncoding,
string.Format(Strings.EncodingDetected.GetLocalizedResource(), detectedEncoding.EncodingName)
))
.ToArray();
}
else
{
EncodingOptions = EncodingItem.Defaults;
}
SelectedEncoding = EncodingOptions.FirstOrDefault();
}



public IRelayCommand PrimaryButtonClickCommand { get; private set; }

Expand Down
Loading