Skip to content

Commit

Permalink
Add caption lists (#262)
Browse files Browse the repository at this point in the history
  • Loading branch information
MrHinsh authored Feb 4, 2025
2 parents bf777b1 + 735e933 commit 09dd002
Show file tree
Hide file tree
Showing 1,736 changed files with 43,891 additions and 735 deletions.
13 changes: 11 additions & 2 deletions .powershell/_includes/HugoHelpers.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ function Update-Field {
else {
Write-Host "$fieldName already exists and is not empty"
}
return $frontMatter
return
}

# Determine the position to insert the new field
Expand All @@ -92,7 +92,7 @@ function Update-Field {
}

Write-Host "$fieldName added"
return $frontMatter
return
}

# Update-List function to have the same signature as Update-Field
Expand Down Expand Up @@ -148,6 +148,15 @@ function Update-StringList {
Write-Host "$fieldName already contains all values"
}
}

# Ensure uniqueness while preserving the first occurrence’s casing
$seen = @{}
$frontMatter[$fieldName] = @(
$frontMatter[$fieldName] | Where-Object {
$lower = $_.ToLower()
-not $seen.ContainsKey($lower) -and ($seen[$lower] = $_) # Store the first occurrence's original case
}
)

$frontMatter[$fieldName] = @($frontMatter[$fieldName] | Select-Object -Unique)

Expand Down
19 changes: 19 additions & 0 deletions .powershell/_includes/ResourceHelpers.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,23 @@ function New-ResourceId {
return $resourceId
}

function Get-ResourceType {
param (
[string]$FilePath
)

# Define regex pattern to match resource type
$pattern = '\\content\\resources\\(?<ResourceType>[^\\]+)\\'

# Run regex match
if ($FilePath -match $pattern) {
return $matches['ResourceType']
}
else {
Write-Host "No match found." -ForegroundColor Red
return $null
}
}


Write-Host "ResourceHelpers.ps1 loaded" -ForegroundColor Green
138 changes: 138 additions & 0 deletions .powershell/build/Generate-CombinedCaptions.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Helpers
. ./.powershell/_includes/OpenAI.ps1
. ./.powershell/_includes/HugoHelpers.ps1

$inputDir = "site\content\resources\videos\youtube"
$outputDir = "site\data"
$maxWordCount = 5000

# Function to split data based on word count and create index
function Split-Data {
param (
[array]$data,
[string]$baseName,
[string]$extension,
[int]$wordLimit
)
$chunk = @()
$currentWordCount = 0
$chunkIndex = 1
$indexList = @()

foreach ($item in $data) {
$cleanCaptions = ($item.captions -replace "\s+", " ").Trim()
$itemWords = ($cleanCaptions -split '\s+').Count
Write-Host "Processing item: $item.id - Word count: $itemWords"

if (($currentWordCount + $itemWords) -gt $wordLimit -and $chunk.Count -gt 0) {
$chunkFileName = "${baseName}_part$chunkIndex" + ($extension -ne "" ? ".$extension" : "")
$chunkFilePath = "$outputDir\$chunkFileName"

if ($extension -eq "json") {
$chunk | ConvertTo-Json -Depth 3 | Set-Content -Path $chunkFilePath -Encoding UTF8
}
else {
$chunk | ConvertTo-Yaml | Set-Content -Path $chunkFilePath -Encoding UTF8
}

Write-Host "Saved chunk: $chunkFilePath with word count: $currentWordCount"
$indexList += @{ part = $chunkIndex; file = $chunkFileName }

$chunk = @()
$currentWordCount = 0
$chunkIndex++
}

$chunk += $item
$currentWordCount += $itemWords
}

if ($chunk.Count -gt 0) {
$chunkFileName = "${baseName}-chunk$chunkIndex" + ($extension -ne "" ? ".$extension" : "")
$chunkFilePath = "$outputDir\$chunkFileName"

if ($extension -eq "json") {
$chunk | ConvertTo-Json -Depth 3 | Set-Content -Path $chunkFilePath -Encoding UTF8
}
else {
$chunk | ConvertTo-Yaml | Set-Content -Path $chunkFilePath -Encoding UTF8
}

Write-Host "Saved chunk: $chunkFilePath with final word count: $currentWordCount"
$indexList += @{ part = $chunkIndex; file = $chunkFileName }
}

# Save index files
$indexJsonPath = "$outputDir\collection.captions.index.json"
$indexYamlPath = "$outputDir\collection.captions.index.yml"

$indexList | ConvertTo-Json -Depth 3 | Set-Content -Path $indexJsonPath -Encoding UTF8
$indexList | ConvertTo-Yaml | Set-Content -Path $indexYamlPath -Encoding UTF8

Write-Host "Saved index JSON: $indexJsonPath"
Write-Host "Saved index YAML: $indexYamlPath"
}

# Function to generate combined captions in YAML and JSON
function Generate-CombinedCaptions {
Write-Host "Starting caption processing..."
$captionsData = @()

# Iterate through each video folder
Get-ChildItem -Path $inputDir -Directory | ForEach-Object {
$videoDir = $_.FullName
$videoId = $_.Name
Write-Host "Processing video: $videoId"

$hugoDataMarkdown = "$videoDir\index.md"
if (Test-Path $hugoDataMarkdown) {
$HugoData = Get-HugoMarkdown -Path $hugoDataMarkdown
Write-Host "Loaded metadata for $videoId"
}
else {
Write-Host "Warning: Missing index.md for $videoId"
return
}

$captionsFile = "$videoDir\index.captions.en.md"
if (Test-Path $captionsFile) {
$videoCaptions = Get-Content -Path $captionsFile -Raw
Write-Host "Loaded captions for $videoId"
}
else {
$videoCaptions = ""
Write-Host "Warning: No captions found for $videoId"
}

$videoTitle = $HugoData.FrontMatter.title
$videoDescription = $HugoData.FrontMatter.description

$captionsData += [PSCustomObject]@{
id = $videoId
title = $videoTitle
description = $videoDescription
captions = $videoCaptions
}
}

# Save full YAML file
$yamlFilePath = "$outputDir\collection.captions.yml"
$captionsData | ConvertTo-Yaml | Set-Content -Path $yamlFilePath -Encoding UTF8
Write-Host "Saved full YAML file: $yamlFilePath"

# Save full JSON file
$jsonFilePath = "$outputDir\collection.captions.json"
$captionsData | ConvertTo-Json -Depth 3 | Set-Content -Path $jsonFilePath -Encoding UTF8
Write-Host "Saved full JSON file: $jsonFilePath"

# Split into word count-based chunks and generate index files
#Write-Host "Splitting YAML file into word-based chunks..."
#Split-Data -data $captionsData -baseName "collection.captions" -extension "yml" -wordLimit $maxWordCount

#Write-Host "Splitting JSON file into word-based chunks..."
#Split-Data -data $captionsData -baseName "collection.captions" -extension "json" -wordLimit $maxWordCount

Write-Host "Caption processing completed."
}

Generate-CombinedCaptions
2 changes: 1 addition & 1 deletion .powershell/build/Update-YoutubeChannelData.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ $refreshData = $false

$videoUpdateLimit = 50
$maxYoutubeSearchResults = 1000
$maxYoutubeDataAgeHours = 48
$maxYoutubeDataAgeHours = 300

$captionsManafestUpdateLimit = 50
$captionsDownloadLimit = 25
Expand Down
5 changes: 4 additions & 1 deletion .powershell/build/Update-YoutubeMarkdownFiles.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ function Update-YoutubeMarkdownFiles {
# Remove consecutive dashes
$urlSafeTitle = $urlSafeTitle -replace '-+', '-'

$aliases = @("/resources/videos/$videoId", "/resources/videos/$urlSafeTitle", "/resources/$urlSafeTitle")
$aliases = @("/resources/$videoId", "/resources/videos/$videoId", "/resources/videos/$urlSafeTitle", "/resources/$urlSafeTitle")

# Get the tags from the snippet and filter out excluded tags
$tags = @()
Expand Down Expand Up @@ -131,6 +131,9 @@ function Update-YoutubeMarkdownFiles {

Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'videoId' -fieldValue $videoId
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceId' -fieldValue $videoId -addAfter "date"
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceType' -fieldValue "video" -addAfter "ResourceId"
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImport' -fieldValue $true -addAfter 'ResourceType'
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportSource' -fieldValue "Youtube" -addAfter 'ResourceImport'
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'source' -fieldValue $source -addAfter "videoId"
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'url' -fieldValue "/resources/videos/:slug"
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'slug' -fieldValue $urlSafeTitle
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
. ./.powershell/_includes/ResourceHelpers.ps1

# Iterate through each blog folder and update markdown files
$outputDir = "site\content\resources\blog\2025"
$outputDir = "site\content\resources\blog\"

# Get list of directories and select the first 10
$blogs = Get-ChildItem -Path $outputDir -Recurse -Filter "index.md" #| Select-Object -First 10
Expand Down Expand Up @@ -38,7 +38,8 @@ $blogs | ForEach-Object {
}

if ($hugoMarkdown.FrontMatter.Contains("id")) {
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImport' -fieldValue $true -addAfter 'ResourceId'
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceType' -fieldValue "blogpost" -addAfter 'ResourceId'
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImport' -fieldValue $true -addAfter 'ResourceType'
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportSource' -fieldValue "Wordpress" -addAfter 'ResourceImport'
If (([datetime]$hugoMarkdown.FrontMatter.date) -lt ([datetime]'2011-02-16')) {
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportOriginalSource' -fieldValue "GeeksWithBlogs" -addAfter 'ResourceImportSource'
Expand Down
99 changes: 99 additions & 0 deletions .powershell/single-use/resources/Update-ReourcesFrontMatter.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Helpers
. ./.powershell/_includes/OpenAI.ps1
. ./.powershell/_includes/HugoHelpers.ps1
. ./.powershell/_includes/ResourceHelpers.ps1

# Iterate through each blog folder and update markdown files
$outputDir = "site\content\resources\"

# Get list of directories and select the first 10
$resources = Get-ChildItem -Path $outputDir -Recurse -Filter "index.md" | Select-Object -First 10

$resources | ForEach-Object {
$resourceDir = (Get-Item -Path $_).DirectoryName
$markdownFile = $_
Write-Host "--------------------------------------------------------"
Write-Host "Processing post: $resourceDir"
if ((Test-Path $markdownFile)) {

# Load markdown as HugoMarkdown object
$hugoMarkdown = Get-HugoMarkdown -Path $markdownFile

if (-not $hugoMarkdown.FrontMatter.description) {
# Generate a new description using OpenAI
$prompt = "Generate a concise, engaging description of no more than 160 characters for the following video: '$($videoData.snippet.title)'. The video details are: '$($videoData.snippet.description)'"
$description = Get-OpenAIResponse -Prompt $prompt
# Update the description in the front matter
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'description' -fieldValue $description -addAfter 'title'
}
$ResourceId = $null;
if ($hugoMarkdown.FrontMatter.Contains("ResourceId")) {
$ResourceId = $hugoMarkdown.FrontMatter.ResourceId
}
elseif ($hugoMarkdown.FrontMatter.Contains("videoId")) {
$ResourceId = $hugoMarkdown.FrontMatter.videoId
}
else {
$ResourceId = New-ResourceId
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceId' -fieldValue $ResourceId -addAfter 'description'
}
$ResourceType = Get-ResourceType -FilePath $resourceDir

$aliases = @()
$404aliases = @()
switch ($ResourceType) {
"blog" {
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImport' -fieldValue $hugoMarkdown.FrontMatter.Contains("ResourceImportId") -addAfter 'ResourceId' -Overwrite
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportSource' -fieldValue "Wordpress" -addAfter 'ResourceImport'
If (([datetime]$hugoMarkdown.FrontMatter.date) -lt ([datetime]'2011-02-16')) {
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportOriginalSource' -fieldValue "GeeksWithBlogs" -addAfter 'ResourceImportSource'
}
else {
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportOriginalSource' -fieldValue "Wordpress" -addAfter 'ResourceImportSource'
}
if ($hugoMarkdown.FrontMatter.Contains("slug")) {
$slug = $hugoMarkdown.FrontMatter.slug
$aliases += "/$slug"
$404aliases += "/$slug"
$aliases += "/blog/$slug"
$404aliases += "/blog/$slug"
}
if ($hugoMarkdown.FrontMatter.Contains("title")) {
$slug = $hugoMarkdown.FrontMatter.slug
$urlSafeTitle = ($hugoMarkdown.FrontMatter.title -replace '[:\/\\*?"<>|#%.!,]', '-' -replace '\s+', '-').ToLower()
if ($urlSafeTitle -ne $slug) {
$aliases += "/$urlSafeTitle"
$404aliases += "/$urlSafeTitle"
$aliases += "/blog/$urlSafeTitle"
$404aliases += "/blog/$urlSafeTitle"
}
}

}
"podcast" {

}
"videos" {

}
default {

}
}
# Always add the ResourceId as an alias
if ($hugoMarkdown.FrontMatter.Contains("ResourceId")) {
$aliases += "/resources/$($hugoMarkdown.FrontMatter.ResourceId)"
}
Update-StringList -frontMatter $hugoMarkdown.FrontMatter -fieldName 'aliases' -values $aliases -addAfter 'slug'
if ($404aliases -is [array] -and $404aliases.Count -gt 0) {
Update-StringList -frontMatter $hugoMarkdown.FrontMatter -fieldName 'aliasesFor404' -values $404aliases -addAfter 'aliases'
}

Save-HugoMarkdown -hugoMarkdown $hugoMarkdown -Path $markdownFile
}
else {
Write-Host "Skipping folder: $blogDir (missing index.md)"
}
}

Write-Host "All markdown files processed."
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: Ahaaaa!
description: Join Martin Hinshelwood as he navigates the challenges of coding in his blog post 'Ahaaaa!' and shares his journey to mastering the art of blogging.
ResourceId: E3cUOS0i5ul
ResourceType: blogpost
ResourceImport: true
ResourceImportId: 469
ResourceImportSource: Wordpress
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: Custom UI colour scheme for Windows Forms .NET
description: Learn how to customize your Windows Forms UI with a unique color scheme using .NET. Enhance your application's look effortlessly with our step-by-step guide!
ResourceId: 2vSr2gsP4Rt
ResourceType: blogpost
ResourceImport: true
ResourceImportId: 466
ResourceImportSource: Wordpress
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: Adding ToolStripPanel UI Adapter Support to the Composite UI Application Block
description: Learn how to enhance the Composite UI Application Block with ToolStripPanel support. Discover code solutions and best practices for UI adaptation.
ResourceId: y_vlPh65_5Q
ResourceType: blogpost
ResourceImport: true
ResourceImportId: 467
ResourceImportSource: Wordpress
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: CafeMSN Prize
description: Discover how I won Amazon vouchers from Microsoft twice through CafeMSN! Join me in celebrating this fun achievement and learn more about my experience.
ResourceId: lDSK1w8_5AB
ResourceType: blogpost
ResourceImport: true
ResourceImportId: 465
ResourceImportSource: Wordpress
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: The most usefull .NET tool on the face of the planet!
description: Discover Reflector, the ultimate .NET tool that enhances your coding experience. Unlock the power of .NET with this essential resource for developers!
ResourceId: 5AaOgf__tP8
ResourceType: blogpost
ResourceImport: true
ResourceImportId: 468
ResourceImportSource: Wordpress
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: Windows Communication Framework Evaluation
description: Explore the evaluation of Windows Communication Framework (WCF) and its revolutionary features for web service projects. Discover its benefits for your company!
ResourceId: piNenAHpFYq
ResourceType: blogpost
ResourceImport: true
ResourceImportId: 464
ResourceImportSource: Wordpress
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: Web 2.0
description: Explore the essence of Web 2.0 and its impact on the internet landscape. Understand this buzzword and enhance your project discussions with confidence!
ResourceId: phKPK_P0xjA
ResourceType: blogpost
ResourceImport: true
ResourceImportId: 463
ResourceImportSource: Wordpress
Expand Down
Loading

0 comments on commit 09dd002

Please sign in to comment.