@@ -117,17 +117,52 @@ public class ExtractorYoutubeDL extends Extractor
117
117
// unnamed toethread-local temporary file
118
118
protected transient ThreadLocal <RandomAccessFile > tempfile = new ThreadLocal <RandomAccessFile >() {
119
119
protected RandomAccessFile initialValue () {
120
- File t ;
121
- try {
122
- t = File .createTempFile ("ydl" , ".json" );
123
- RandomAccessFile f = new RandomAccessFile (t , "rw" );
124
- t .delete ();
125
- return f ;
126
- } catch (IOException e ) {
127
- throw new RuntimeException (e );
128
- }
120
+ return null ;
129
121
}
130
122
};
123
+ protected void closeLocalTempFile () {
124
+ RandomAccessFile localTemp = tempfile .get ();
125
+ if (localTemp == null || !isOpen (localTemp ))
126
+ return ; // avoid making a new temp file just to close it immediately
127
+ try {
128
+ getLocalTempFile ().close ();
129
+ tempfile .set (null );
130
+ }
131
+ catch (Exception e ) {
132
+ logger .log (Level .WARNING , "problem closing ydl temp file " + e );
133
+ }
134
+ }
135
+ protected RandomAccessFile getLocalTempFile () {
136
+ RandomAccessFile localTemp = tempfile .get ();
137
+ if (localTemp == null || !isOpen (localTemp )) {
138
+ localTemp = openNewTempFile ();
139
+ tempfile .set (localTemp );
140
+ }
141
+ logger .info ("Getting youtube-dl temp file " );
142
+ return localTemp ;
143
+ }
144
+ protected boolean isOpen (RandomAccessFile f ) {
145
+ try {
146
+ f .length ();
147
+ return true ;
148
+ }
149
+ catch (IOException e ) {
150
+ logger .info ("youtube-dl temp file is not open" );
151
+ return false ;
152
+ }
153
+ }
154
+ protected RandomAccessFile openNewTempFile () {
155
+ logger .info ("Opening New youtube-dl temp file " );
156
+ File t ;
157
+ try {
158
+ t = File .createTempFile ("ydl" , ".json" );
159
+ RandomAccessFile f = new RandomAccessFile (t , "rw" );
160
+ t .delete ();
161
+ return f ;
162
+ } catch (IOException e ) {
163
+ throw new RuntimeException (e );
164
+ }
165
+ }
131
166
132
167
protected CrawlerLoggerModule crawlerLoggerModule ;
133
168
public CrawlerLoggerModule getCrawlerLoggerModule () {
@@ -447,7 +482,7 @@ public String call() throws IOException {
447
482
}
448
483
});
449
484
450
- YoutubeDLResults results = new YoutubeDLResults (tempfile . get ());
485
+ YoutubeDLResults results = new YoutubeDLResults (getLocalTempFile ());
451
486
452
487
try {
453
488
try {
@@ -525,7 +560,14 @@ public boolean shouldBuildRecord(CrawlURI uri) {
525
560
// should build record for containing page, which has an
526
561
// annotation like "youtube-dl:3" (no slash)
527
562
String annotation = findYdlAnnotation (uri );
528
- return annotation != null && !annotation .contains ("/" );
563
+ boolean shouldBuild = (annotation != null && !annotation .contains ("/" ));
564
+
565
+ // If we processed this uri, then we have an open temp file that won't get closed
566
+ // for us by the warc writer
567
+ if (!shouldBuild )
568
+ closeLocalTempFile ();
569
+
570
+ return shouldBuild ;
529
571
}
530
572
531
573
@ Override
@@ -546,10 +588,10 @@ public WARCRecordInfo buildRecord(CrawlURI curi, URI concurrentTo)
546
588
recordInfo .setMimetype ("application/vnd.youtube-dl_formats+json;charset=utf-8" );
547
589
recordInfo .setEnforceLength (true );
548
590
549
- tempfile . get ().seek (0 );
550
- InputStream inputStream = Channels .newInputStream (tempfile . get ().getChannel ());
591
+ getLocalTempFile ().seek (0 );
592
+ InputStream inputStream = Channels .newInputStream (getLocalTempFile ().getChannel ());
551
593
recordInfo .setContentStream (inputStream );
552
- recordInfo .setContentLength (tempfile . get ().length ());
594
+ recordInfo .setContentLength (getLocalTempFile ().length ());
553
595
554
596
logger .info ("built record timestamp=" + timestamp + " url=" + recordInfo .getUrl ());
555
597
@@ -575,7 +617,7 @@ public static void main(String[] args) throws IOException {
575
617
ExtractorYoutubeDL e = new ExtractorYoutubeDL ();
576
618
577
619
FileInputStream in = new FileInputStream ("/tmp/ydl-single-video.json" );
578
- YoutubeDLResults results = new YoutubeDLResults (e .tempfile . get ());
620
+ YoutubeDLResults results = new YoutubeDLResults (e .getLocalTempFile ());
579
621
e .streamYdlOutput (in , results );
580
622
System .out .println ("video urls: " + results .videoUrls );
581
623
System .out .println ("page urls: " + results .pageUrls );
@@ -591,7 +633,7 @@ public static void main(String[] args) throws IOException {
591
633
}
592
634
593
635
in = new FileInputStream ("/tmp/ydl-uncgreensboro-limited.json" );
594
- results = new YoutubeDLResults (e .tempfile . get ());
636
+ results = new YoutubeDLResults (e .getLocalTempFile ());
595
637
e .streamYdlOutput (in , results );
596
638
System .out .println ("video urls: " + results .videoUrls );
597
639
System .out .println ("page urls: " + results .pageUrls );
0 commit comments