Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/rdmp 34 chi ingress #1688

Closed
wants to merge 76 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
76 commits
Select commit Hold shift + click to select a range
c3e1ce6
add chi column finder
JFriel Nov 27, 2023
aa43931
basic redaction commands
JFriel Nov 27, 2023
405f076
add start of mutilator
JFriel Nov 27, 2023
35735ae
basic working dl
JFriel Nov 28, 2023
0b4b4cb
basic ui
JFriel Nov 28, 2023
50fd26f
make start on ui
JFriel Nov 28, 2023
f235070
basic ui
JFriel Nov 28, 2023
6a30d94
fix namespace
JFriel Nov 28, 2023
44fb9be
remove hicplugin reference
JFriel Nov 29, 2023
37746fa
fix file documentation
JFriel Nov 29, 2023
ae8116e
fix up tests
JFriel Nov 29, 2023
3ceb878
use correct list
JFriel Nov 29, 2023
5a8eeeb
add file extention
JFriel Nov 29, 2023
f0466e3
add ignore strings
JFriel Nov 29, 2023
6cf5173
change visability
JFriel Nov 29, 2023
b6a2848
add missing headers
JFriel Nov 29, 2023
1967a73
update unit test
JFriel Nov 29, 2023
4a7ad53
add creation db code
JFriel Nov 29, 2023
63e2e2e
try no params
JFriel Nov 29, 2023
8ccde9d
update test
JFriel Nov 29, 2023
bdea723
temp
JFriel Nov 30, 2023
15e90db
update redacted chi
JFriel Nov 30, 2023
b910bac
updated data load
JFriel Dec 4, 2023
e9cce51
working restore
JFriel Dec 4, 2023
135288f
working identify
JFriel Dec 4, 2023
0857816
fix mutilator
JFriel Dec 5, 2023
3d0a741
working redaction
JFriel Dec 5, 2023
99f660e
readd command execution
JFriel Dec 5, 2023
02fc8c2
working redaction
JFriel Dec 5, 2023
e938313
start of the dqe
JFriel Dec 5, 2023
4528aee
document files
JFriel Dec 5, 2023
efa7312
add rdmp all ignore
JFriel Dec 5, 2023
fa89202
Merge branch 'develop' of https://github.com/HicServices/RDMP into fe…
JFriel Dec 11, 2023
da567f7
attempt to fix sql
JFriel Dec 11, 2023
6a7e640
dont redact pks
JFriel Dec 11, 2023
1245852
add early return
JFriel Dec 11, 2023
c950bae
working confirm all
JFriel Dec 12, 2023
9916d7f
tidy up ui
JFriel Dec 12, 2023
d5d9468
update kn issue
JFriel Dec 12, 2023
61a0bf8
make a start on tests
JFriel Dec 12, 2023
90643ca
Merge branch 'develop' of https://github.com/HicServices/RDMP into fe…
JFriel Dec 13, 2023
020c11f
add find tests
JFriel Dec 13, 2023
65c1500
redact chi test
JFriel Dec 13, 2023
22dbd14
update tests
JFriel Dec 13, 2023
ab0ad56
update tests
JFriel Dec 13, 2023
efafd0e
remove prohibited word
JFriel Dec 13, 2023
87a66d3
tidy up code
JFriel Dec 13, 2023
eabe955
comment out test for build testing purposes
JFriel Dec 13, 2023
8307a73
update tests
JFriel Dec 14, 2023
d5520b8
working test
JFriel Dec 14, 2023
fa38a4d
Merge branch 'develop' of https://github.com/HicServices/RDMP into fe…
JFriel Dec 15, 2023
cbad896
add no results found label
JFriel Dec 15, 2023
6919c17
tidy up from codeql
JFriel Dec 15, 2023
707bf8e
add missing file
JFriel Dec 15, 2023
ce1d347
Merge branch 'develop' of https://github.com/HicServices/RDMP into fe…
JFriel Dec 18, 2023
2c36e42
fix csproj
JFriel Dec 18, 2023
d75eddc
fix build
JFriel Dec 18, 2023
886010e
improved multi dataset catalogue
JFriel Dec 18, 2023
1c87027
start to dedupe code
JFriel Dec 18, 2023
40c6218
add summaries
JFriel Dec 18, 2023
bae18fb
tidy up code
JFriel Dec 18, 2023
196e83c
add missed elper useage
JFriel Dec 18, 2023
d1cb089
fix overcorrection
JFriel Dec 18, 2023
775bcaf
remove pipes
JFriel Dec 18, 2023
f79312e
Merge branch 'develop' into feature/RDMP-34-chi-ingress
JFriel Dec 19, 2023
f55e42e
Merge branch 'develop' of https://github.com/HicServices/RDMP into fe…
JFriel Jan 3, 2024
df1f918
Dispose to using fixups
Jan 3, 2024
b4e4160
Variable scope fix
Jan 3, 2024
701e484
Variable scope fix 2
Jan 3, 2024
266c13a
Merge branch 'develop' into feature/RDMP-34-chi-ingress
JFriel Jan 10, 2024
e9edfce
Merge branch 'develop' of https://github.com/HicServices/RDMP into fe…
JFriel Jan 22, 2024
b8f2372
Merge branch 'develop' into feature/RDMP-34-chi-ingress
JFriel Feb 1, 2024
8914090
Merge branch 'develop' into feature/RDMP-34-chi-ingress
JFriel Feb 5, 2024
5613b5a
Merge branch 'develop' of https://github.com/HicServices/RDMP into fe…
JFriel Mar 27, 2024
d538d68
Merge branch 'develop' of https://github.com/HicServices/RDMP into fe…
JFriel Apr 30, 2024
ebc9e41
update ui
JFriel Apr 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
using HICPlugin.Curation.Data;
using Rdmp.Core.Curation.Data;

namespace Rdmp.Core.CommandExecution.AtomicCommands;

public class ExecuteCommandConfirmRedactedCHI : BasicCommandExecution, IAtomicCommand
{
RedactedCHI _redactedCHI;
public ExecuteCommandConfirmRedactedCHI(IBasicActivateItems activator, [DemandsInitialization("redactionto confirm")]RedactedCHI redaction): base(activator)
{
_redactedCHI = redaction;
}

public override void Execute()
{
base.Execute();
_redactedCHI.DeleteInDatabase();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@

using Rdmp.Core.CommandExecution;
using Rdmp.Core.CommandExecution.AtomicCommands;
using Rdmp.Core.Curation.Data;
using Rdmp.Core.DataFlowPipeline;
using Rdmp.Core.ReusableLibraryCode.DataAccess;
using System;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using YamlDotNet.Serialization;

namespace Rdmp.Core.CommandExecution.AtomicCommands;

public class ExecuteCommandIdentifyCHIInCatalogue : BasicCommandExecution, IAtomicCommand
{

private ICatalogue _catalouge;
private IBasicActivateItems _activator;
private bool _bailOutEarly;
private readonly Dictionary<string, List<string>> _allowLists = new();

public ExecuteCommandIdentifyCHIInCatalogue(IBasicActivateItems activator, [DemandsInitialization("The catalogue to search")] ICatalogue catalogue, bool bailOutEarly = false, string allowListLocation = null) : base(activator)
{
_catalouge = catalogue;
_activator = activator;
_bailOutEarly = bailOutEarly;
if(!string.IsNullOrWhiteSpace(allowListLocation))
{
var allowListFileContent = File.ReadAllText(allowListLocation);
var deserializer = new DeserializerBuilder().Build();
var yamlObject = deserializer.Deserialize<Dictionary<string, List<string>>>(allowListFileContent);
foreach (var (cat, columns) in yamlObject)
{
_allowLists.Add(cat, columns);
}
}
}


public static string WrapCHIInContext(string chi, string source, int padding = 25)
{
var foundIndex = source.IndexOf(chi);
return $"{source[Math.Max(0, foundIndex - padding)..foundIndex]}{chi}{source[(foundIndex + chi.Length)..Math.Min(foundIndex + chi.Length + padding, source.Length)]}";
}



private void handleFoundCHI(string foundChi,string contextValue, string columnName)
{
if(foundChis.Rows.Count == 0)
{
//init
foundChis.Columns.Add("Potential CHI");
foundChis.Columns.Add("Context");
foundChis.Columns.Add("Source Column Name");
}
var shrunkContext = WrapCHIInContext(foundChi,contextValue);
foundChis.Rows.Add(foundChi, shrunkContext, columnName);
}
public DataTable foundChis = new();

public override void Execute()
{
base.Execute();
List<string> columnAllowList = new();
if (_allowLists.TryGetValue("RDMP_ALL", out var _extractionSpecificAllowances))
columnAllowList.AddRange(_extractionSpecificAllowances);
if (_allowLists.TryGetValue(_catalouge.Name, out var _catalogueSpecificAllowances))
columnAllowList.AddRange(_catalogueSpecificAllowances.ToList());
foreach (var item in _catalouge.CatalogueItems)
{
if (columnAllowList.Contains(item.Name)) continue;

if (_bailOutEarly && foundChis.Rows.Count > 0)
{
break;
}
var column = item.ColumnInfo.Name;
int idxOfLastSplit = column.LastIndexOf('.');
var columnName = column[(idxOfLastSplit + 1)..];
var server = _catalouge.GetDistinctLiveDatabaseServer(DataAccessContext.InternalDataProcessing, false);
var sql = $"SELECT {columnName} from {column[..idxOfLastSplit]}";
var dt = new DataTable();
dt.BeginLoadData();
using (var cmd = server.GetCommand(sql, server.GetConnection()))
{
using var da = server.GetDataAdapter(cmd);
da.Fill(dt);
}
dt.EndLoadData();
foreach (DataRow row in dt.Rows)
{

var value = row[dt.Columns[0].ColumnName].ToString();
var potentialCHI = CHIColumnFinder.GetPotentialCHI(value);
if (!string.IsNullOrWhiteSpace(potentialCHI))
{
handleFoundCHI(potentialCHI, value, item.Name);
if (_bailOutEarly)
{
break;
}
}


}
}
Console.WriteLine($"Found {foundChis.Rows.Count} CHIs in the {_catalouge.Name} Catalogue.");
foreach(DataRow row in foundChis.Rows)
{
Console.WriteLine($"{row["potential CHI"]} | {row["Context"]} | {row["Source Column Name"]}");

}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
using Rdmp.Core.CommandExecution.AtomicCommands;
using Rdmp.Core.CommandExecution;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Rdmp.Core.Curation.Data;
using YamlDotNet.Serialization;
using System.IO;
using Rdmp.Core.ReusableLibraryCode.DataAccess;
using System.Data;
using static NPOI.HSSF.Util.HSSFColor;
using Rdmp.Core.Curation.Data.Defaults;
using TB.ComponentModel;
using FAnsi.Discovery.TableCreation;
using HICPlugin.Curation.Data;
using Rdmp.Core.DataFlowPipeline;

namespace Rdmp.Core.CommandExecution.AtomicCommands;

public class ExecuteCommandRedactCHIsFromCatalogue : BasicCommandExecution, IAtomicCommand
{

private ICatalogue _catalouge;
private IBasicActivateItems _activator;
private readonly Dictionary<string, List<string>> _allowLists = new();
public int redactionCount = 0;

public ExecuteCommandRedactCHIsFromCatalogue(IBasicActivateItems activator, [DemandsInitialization("The catalogue to search")] ICatalogue catalogue, string allowListLocation = null) : base(activator)
{
_catalouge = catalogue;
_activator = activator;
if (!string.IsNullOrWhiteSpace(allowListLocation))
{
var allowListFileContent = File.ReadAllText(allowListLocation);
var deserializer = new DeserializerBuilder().Build();
var yamlObject = deserializer.Deserialize<Dictionary<string, List<string>>>(allowListFileContent);
foreach (var (cat, columns) in yamlObject)
{
_allowLists.Add(cat, columns);
}
}
}
private void handleFoundCHI(string foundChi, string table, string column, string columnValue)
{
Console.WriteLine("Found CHI!");
redactionCount++;
var rc = new RedactedCHI(_activator.RepositoryLocator.CatalogueRepository, foundChi, ExecuteCommandIdentifyCHIInCatalogue.WrapCHIInContext(foundChi,columnValue,20),$"{table}.{column}");
rc.SaveToDatabase();
var redactedValue = columnValue.Replace(foundChi, $"REDACTED_CHI_{rc.ID}");
//TODO can be smarted about how we wrote tothe db, can share a connection etc
var sql = $"UPDATE {table} SET {column}='{redactedValue}' where {column}='{columnValue}'";
var server = _catalouge.GetDistinctLiveDatabaseServer(DataAccessContext.InternalDataProcessing, false);
var conn = server.GetConnection();
conn.Open();
using (var cmd = server.GetCommand(sql, conn))
{
cmd.ExecuteNonQuery();
conn.Close();
}
}


public override void Execute()
{
base.Execute();
List<string> columnAllowList = new();
if (_allowLists.TryGetValue("RDMP_ALL", out var _extractionSpecificAllowances))
columnAllowList.AddRange(_extractionSpecificAllowances);
if (_allowLists.TryGetValue(_catalouge.Name, out var _catalogueSpecificAllowances))
columnAllowList.AddRange(_catalogueSpecificAllowances.ToList());
foreach (var item in _catalouge.CatalogueItems)
{
if (columnAllowList.Contains(item.Name)) continue;

var column = item.ColumnInfo.Name;
int idxOfLastSplit = column.LastIndexOf('.');
string table = column[..idxOfLastSplit];
var columnName = column[(idxOfLastSplit + 1)..];
var server = _catalouge.GetDistinctLiveDatabaseServer(DataAccessContext.InternalDataProcessing, false);
var sql = $"SELECT {columnName} from {table}";
var dt = new DataTable();
dt.BeginLoadData();
using (var cmd = server.GetCommand(sql, server.GetConnection()))
{
using var da = server.GetDataAdapter(cmd);
da.Fill(dt);
}
dt.EndLoadData();
foreach (DataRow row in dt.Rows)
{

var value = row[dt.Columns[0].ColumnName].ToString();
var potentialCHI = CHIColumnFinder.GetPotentialCHI(value);
if (!string.IsNullOrWhiteSpace(potentialCHI))
{
handleFoundCHI(potentialCHI, table,columnName,value);
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
using HICPlugin.Curation.Data;
using Microsoft.Data.SqlClient;
using Rdmp.Core.Curation.Data;
using Rdmp.Core.ReusableLibraryCode.DataAccess;
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Rdmp.Core.CommandExecution.AtomicCommands;

public class ExecuteCommandRevertRedactedCHI : BasicCommandExecution, IAtomicCommand
{
RedactedCHI _redactedCHI;
IBasicActivateItems _activator;

public ExecuteCommandRevertRedactedCHI(IBasicActivateItems activator, [DemandsInitialization("Redacted CHIto Revert")] RedactedCHI redaction) : base(activator)
{
_redactedCHI = redaction;
_activator = activator;
}

public override void Execute()
{
base.Execute();
var splitidx = _redactedCHI.CHILocation.LastIndexOf('.');
var table = _redactedCHI.CHILocation[..splitidx];
var column = _redactedCHI.CHILocation[(splitidx + 1)..];
var columnInfo = _activator.RepositoryLocator.CatalogueRepository.GetAllObjects<ColumnInfo>().Where(ci => ci.Name == _redactedCHI.CHILocation).First();
var catalogue = columnInfo.CatalogueItems.FirstOrDefault().Catalogue;
var findSlq = $"select {column} from {table} where {column} like '%REDACTED_CHI_{_redactedCHI.ID}%';";
var existingResultsDT = new DataTable();
using (var con = (SqlConnection)catalogue.GetDistinctLiveDatabaseServer(DataAccessContext.InternalDataProcessing, false).GetConnection())
{
con.Open();
var da = new SqlDataAdapter(new SqlCommand(findSlq, con));
da.Fill(existingResultsDT);
if (existingResultsDT.Rows.Count > 0 && existingResultsDT.Rows[0].ItemArray.Length > 0)
{
var currentContext = existingResultsDT.Rows[0].ItemArray[0].ToString();
var newContext = currentContext.Replace($"REDACTED_CHI_{_redactedCHI.ID}", _redactedCHI.PotentialCHI);
var updateSQL = $"update {table} set {column}='{newContext}' where {column} = '{currentContext}'";
var updateCmd = new SqlCommand(updateSQL, con);
updateCmd.ExecuteNonQuery();
}
_redactedCHI.DeleteInDatabase();
}
}
}
23 changes: 23 additions & 0 deletions Rdmp.Core/Curation/Data/IRedactedCHI.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using FAnsi.Naming;
using Rdmp.Core.Curation.Data.Cohort;
using Rdmp.Core.MapsDirectlyToDatabaseTable;
using Rdmp.Core.Repositories;
using Rdmp.Core.ReusableLibraryCode.Checks;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace HICPlugin.Curation.Data;

public interface IRedactedCHI :IMapsDirectlyToDatabaseTable
{

ICatalogueRepository CatalogueRepository { get; }

string PotentialCHI { get; }
string CHIContext{ get; }

string CHILocation { get; }
}
Loading