11package ia
22
33import (
4+ "encoding/json"
45 "fmt"
6+ "io/ioutil"
57 "net/http"
8+ "net/url"
69 "regexp"
710 "time"
811)
1922 host = "archive.org"
2023 dest = "https://web." + host
2124 base = "https://web.archive.org/save/"
25+
26+ endpoint = "https://archive.org/wayback/available"
2227)
2328
2429func (wbrc * Archiver ) fetch (url string , ch chan <- string ) {
@@ -62,10 +67,16 @@ func (wbrc *Archiver) fetch(url string, ch chan<- string) {
6267 return
6368 }
6469
70+ got := latest (url )
71+
6572 // HTTP 509 Bandwidth Limit Exceeded
6673 if resp .StatusCode == 509 {
67- // https://web.archive.org/*/https://example.org
68- ch <- fmt .Sprintf ("%s/*/%s" , dest , url )
74+ ch <- fmt .Sprint (got )
75+ return
76+ }
77+
78+ if resp .StatusCode != 200 {
79+ ch <- fmt .Sprint (got )
6980 return
7081 }
7182
@@ -82,3 +93,39 @@ func isURL(str string) bool {
8293 }
8394 return false
8495}
96+
97+ func latest (s string ) string {
98+ // https://web.archive.org/*/https://example.org
99+ u := fmt .Sprintf ("%s/*/%s" , dest , s )
100+
101+ if _ , err := url .Parse (s ); err != nil {
102+ return u
103+ }
104+
105+ endpoint += "?url=" + s
106+ resp , err := http .Get (endpoint )
107+ if err != nil {
108+ return u
109+ }
110+ defer resp .Body .Close ()
111+
112+ data , err := ioutil .ReadAll (resp .Body )
113+ if err != nil {
114+ return u
115+ }
116+
117+ var dat map [string ]interface {}
118+ if err := json .Unmarshal (data , & dat ); err != nil {
119+ return u
120+ }
121+
122+ if archived , ok := dat ["archived_snapshots" ].(map [string ]interface {}); ok {
123+ if closest , ok := archived ["closest" ].(map [string ]interface {}); ok {
124+ if closest ["available" ].(bool ) {
125+ return closest ["url" ].(string )
126+ }
127+ }
128+ }
129+
130+ return u
131+ }
0 commit comments