-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1,736 changed files
with
43,891 additions
and
735 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
# Helpers | ||
. ./.powershell/_includes/OpenAI.ps1 | ||
. ./.powershell/_includes/HugoHelpers.ps1 | ||
|
||
$inputDir = "site\content\resources\videos\youtube" | ||
$outputDir = "site\data" | ||
$maxWordCount = 5000 | ||
|
||
# Function to split data based on word count and create index | ||
function Split-Data { | ||
param ( | ||
[array]$data, | ||
[string]$baseName, | ||
[string]$extension, | ||
[int]$wordLimit | ||
) | ||
$chunk = @() | ||
$currentWordCount = 0 | ||
$chunkIndex = 1 | ||
$indexList = @() | ||
|
||
foreach ($item in $data) { | ||
$cleanCaptions = ($item.captions -replace "\s+", " ").Trim() | ||
$itemWords = ($cleanCaptions -split '\s+').Count | ||
Write-Host "Processing item: $item.id - Word count: $itemWords" | ||
|
||
if (($currentWordCount + $itemWords) -gt $wordLimit -and $chunk.Count -gt 0) { | ||
$chunkFileName = "${baseName}_part$chunkIndex" + ($extension -ne "" ? ".$extension" : "") | ||
$chunkFilePath = "$outputDir\$chunkFileName" | ||
|
||
if ($extension -eq "json") { | ||
$chunk | ConvertTo-Json -Depth 3 | Set-Content -Path $chunkFilePath -Encoding UTF8 | ||
} | ||
else { | ||
$chunk | ConvertTo-Yaml | Set-Content -Path $chunkFilePath -Encoding UTF8 | ||
} | ||
|
||
Write-Host "Saved chunk: $chunkFilePath with word count: $currentWordCount" | ||
$indexList += @{ part = $chunkIndex; file = $chunkFileName } | ||
|
||
$chunk = @() | ||
$currentWordCount = 0 | ||
$chunkIndex++ | ||
} | ||
|
||
$chunk += $item | ||
$currentWordCount += $itemWords | ||
} | ||
|
||
if ($chunk.Count -gt 0) { | ||
$chunkFileName = "${baseName}-chunk$chunkIndex" + ($extension -ne "" ? ".$extension" : "") | ||
$chunkFilePath = "$outputDir\$chunkFileName" | ||
|
||
if ($extension -eq "json") { | ||
$chunk | ConvertTo-Json -Depth 3 | Set-Content -Path $chunkFilePath -Encoding UTF8 | ||
} | ||
else { | ||
$chunk | ConvertTo-Yaml | Set-Content -Path $chunkFilePath -Encoding UTF8 | ||
} | ||
|
||
Write-Host "Saved chunk: $chunkFilePath with final word count: $currentWordCount" | ||
$indexList += @{ part = $chunkIndex; file = $chunkFileName } | ||
} | ||
|
||
# Save index files | ||
$indexJsonPath = "$outputDir\collection.captions.index.json" | ||
$indexYamlPath = "$outputDir\collection.captions.index.yml" | ||
|
||
$indexList | ConvertTo-Json -Depth 3 | Set-Content -Path $indexJsonPath -Encoding UTF8 | ||
$indexList | ConvertTo-Yaml | Set-Content -Path $indexYamlPath -Encoding UTF8 | ||
|
||
Write-Host "Saved index JSON: $indexJsonPath" | ||
Write-Host "Saved index YAML: $indexYamlPath" | ||
} | ||
|
||
# Function to generate combined captions in YAML and JSON | ||
function Generate-CombinedCaptions { | ||
Write-Host "Starting caption processing..." | ||
$captionsData = @() | ||
|
||
# Iterate through each video folder | ||
Get-ChildItem -Path $inputDir -Directory | ForEach-Object { | ||
$videoDir = $_.FullName | ||
$videoId = $_.Name | ||
Write-Host "Processing video: $videoId" | ||
|
||
$hugoDataMarkdown = "$videoDir\index.md" | ||
if (Test-Path $hugoDataMarkdown) { | ||
$HugoData = Get-HugoMarkdown -Path $hugoDataMarkdown | ||
Write-Host "Loaded metadata for $videoId" | ||
} | ||
else { | ||
Write-Host "Warning: Missing index.md for $videoId" | ||
return | ||
} | ||
|
||
$captionsFile = "$videoDir\index.captions.en.md" | ||
if (Test-Path $captionsFile) { | ||
$videoCaptions = Get-Content -Path $captionsFile -Raw | ||
Write-Host "Loaded captions for $videoId" | ||
} | ||
else { | ||
$videoCaptions = "" | ||
Write-Host "Warning: No captions found for $videoId" | ||
} | ||
|
||
$videoTitle = $HugoData.FrontMatter.title | ||
$videoDescription = $HugoData.FrontMatter.description | ||
|
||
$captionsData += [PSCustomObject]@{ | ||
id = $videoId | ||
title = $videoTitle | ||
description = $videoDescription | ||
captions = $videoCaptions | ||
} | ||
} | ||
|
||
# Save full YAML file | ||
$yamlFilePath = "$outputDir\collection.captions.yml" | ||
$captionsData | ConvertTo-Yaml | Set-Content -Path $yamlFilePath -Encoding UTF8 | ||
Write-Host "Saved full YAML file: $yamlFilePath" | ||
|
||
# Save full JSON file | ||
$jsonFilePath = "$outputDir\collection.captions.json" | ||
$captionsData | ConvertTo-Json -Depth 3 | Set-Content -Path $jsonFilePath -Encoding UTF8 | ||
Write-Host "Saved full JSON file: $jsonFilePath" | ||
|
||
# Split into word count-based chunks and generate index files | ||
#Write-Host "Splitting YAML file into word-based chunks..." | ||
#Split-Data -data $captionsData -baseName "collection.captions" -extension "yml" -wordLimit $maxWordCount | ||
|
||
#Write-Host "Splitting JSON file into word-based chunks..." | ||
#Split-Data -data $captionsData -baseName "collection.captions" -extension "json" -wordLimit $maxWordCount | ||
|
||
Write-Host "Caption processing completed." | ||
} | ||
|
||
Generate-CombinedCaptions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
99 changes: 99 additions & 0 deletions
99
.powershell/single-use/resources/Update-ReourcesFrontMatter.ps1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
# Helpers | ||
. ./.powershell/_includes/OpenAI.ps1 | ||
. ./.powershell/_includes/HugoHelpers.ps1 | ||
. ./.powershell/_includes/ResourceHelpers.ps1 | ||
|
||
# Iterate through each blog folder and update markdown files | ||
$outputDir = "site\content\resources\" | ||
|
||
# Get list of directories and select the first 10 | ||
$resources = Get-ChildItem -Path $outputDir -Recurse -Filter "index.md" | Select-Object -First 10 | ||
|
||
$resources | ForEach-Object { | ||
$resourceDir = (Get-Item -Path $_).DirectoryName | ||
$markdownFile = $_ | ||
Write-Host "--------------------------------------------------------" | ||
Write-Host "Processing post: $resourceDir" | ||
if ((Test-Path $markdownFile)) { | ||
|
||
# Load markdown as HugoMarkdown object | ||
$hugoMarkdown = Get-HugoMarkdown -Path $markdownFile | ||
|
||
if (-not $hugoMarkdown.FrontMatter.description) { | ||
# Generate a new description using OpenAI | ||
$prompt = "Generate a concise, engaging description of no more than 160 characters for the following video: '$($videoData.snippet.title)'. The video details are: '$($videoData.snippet.description)'" | ||
$description = Get-OpenAIResponse -Prompt $prompt | ||
# Update the description in the front matter | ||
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'description' -fieldValue $description -addAfter 'title' | ||
} | ||
$ResourceId = $null; | ||
if ($hugoMarkdown.FrontMatter.Contains("ResourceId")) { | ||
$ResourceId = $hugoMarkdown.FrontMatter.ResourceId | ||
} | ||
elseif ($hugoMarkdown.FrontMatter.Contains("videoId")) { | ||
$ResourceId = $hugoMarkdown.FrontMatter.videoId | ||
} | ||
else { | ||
$ResourceId = New-ResourceId | ||
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceId' -fieldValue $ResourceId -addAfter 'description' | ||
} | ||
$ResourceType = Get-ResourceType -FilePath $resourceDir | ||
|
||
$aliases = @() | ||
$404aliases = @() | ||
switch ($ResourceType) { | ||
"blog" { | ||
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImport' -fieldValue $hugoMarkdown.FrontMatter.Contains("ResourceImportId") -addAfter 'ResourceId' -Overwrite | ||
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportSource' -fieldValue "Wordpress" -addAfter 'ResourceImport' | ||
If (([datetime]$hugoMarkdown.FrontMatter.date) -lt ([datetime]'2011-02-16')) { | ||
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportOriginalSource' -fieldValue "GeeksWithBlogs" -addAfter 'ResourceImportSource' | ||
} | ||
else { | ||
Update-Field -frontMatter $hugoMarkdown.FrontMatter -fieldName 'ResourceImportOriginalSource' -fieldValue "Wordpress" -addAfter 'ResourceImportSource' | ||
} | ||
if ($hugoMarkdown.FrontMatter.Contains("slug")) { | ||
$slug = $hugoMarkdown.FrontMatter.slug | ||
$aliases += "/$slug" | ||
$404aliases += "/$slug" | ||
$aliases += "/blog/$slug" | ||
$404aliases += "/blog/$slug" | ||
} | ||
if ($hugoMarkdown.FrontMatter.Contains("title")) { | ||
$slug = $hugoMarkdown.FrontMatter.slug | ||
$urlSafeTitle = ($hugoMarkdown.FrontMatter.title -replace '[:\/\\*?"<>|#%.!,]', '-' -replace '\s+', '-').ToLower() | ||
if ($urlSafeTitle -ne $slug) { | ||
$aliases += "/$urlSafeTitle" | ||
$404aliases += "/$urlSafeTitle" | ||
$aliases += "/blog/$urlSafeTitle" | ||
$404aliases += "/blog/$urlSafeTitle" | ||
} | ||
} | ||
|
||
} | ||
"podcast" { | ||
|
||
} | ||
"videos" { | ||
|
||
} | ||
default { | ||
|
||
} | ||
} | ||
# Always add the ResourceId as an alias | ||
if ($hugoMarkdown.FrontMatter.Contains("ResourceId")) { | ||
$aliases += "/resources/$($hugoMarkdown.FrontMatter.ResourceId)" | ||
} | ||
Update-StringList -frontMatter $hugoMarkdown.FrontMatter -fieldName 'aliases' -values $aliases -addAfter 'slug' | ||
if ($404aliases -is [array] -and $404aliases.Count -gt 0) { | ||
Update-StringList -frontMatter $hugoMarkdown.FrontMatter -fieldName 'aliasesFor404' -values $404aliases -addAfter 'aliases' | ||
} | ||
|
||
Save-HugoMarkdown -hugoMarkdown $hugoMarkdown -Path $markdownFile | ||
} | ||
else { | ||
Write-Host "Skipping folder: $blogDir (missing index.md)" | ||
} | ||
} | ||
|
||
Write-Host "All markdown files processed." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.