-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathPageQueueTrigger.cs
61 lines (54 loc) · 2.19 KB
/
PageQueueTrigger.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
using System;
using System.IO;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Logging;
using System.Net.Http;
using Newtonsoft.Json;
using HtmlAgilityPack;
using System.Web;
using Microsoft.Azure.Storage.Blob;
using System.Collections.Generic;
namespace HvidevoldDevelopmentENK.GetPixelArt
{
public static class PageQueueTrigger
{
static readonly HttpClient client = new HttpClient();
[FunctionName("PageQueueTrigger")]
public static async Task Run(
[QueueTrigger("pagequeue", Connection = "AzureWebJobsStorage")] string page,
[Blob("opengameart/pages/page{queueTrigger}.html")] CloudBlockBlob blob,
[Queue("contentqueue"), StorageAccount("AzureWebJobsStorage")] ICollector<string> msg,
ILogger log)
{
log.LogInformation($"C# PageQueueTrigger function processed page {page}");
string responseBody = null;
try
{
responseBody = await Common.ReadURIOrCache(blob, Common.SearchURI + "&page=" + page, client);
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(responseBody);
var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//body");
var hashSet = new HashSet<string>();
foreach (var nNode in htmlBody.Descendants("a"))
{
if (nNode.NodeType == HtmlNodeType.Element && nNode.Attributes["href"] != null && nNode.Attributes["href"].Value.StartsWith("/content/"))
{
hashSet.Add(HttpUtility.HtmlDecode(nNode.Attributes["href"].Value));
}
}
foreach (var urlPart in hashSet) {
msg.Add(urlPart);
}
}
catch(HttpRequestException e)
{
log.LogError("\nException Caught!");
log.LogError("Message :{0} ",e.Message);
}
}
}
}