Skip to content

Commit c75b74b

Browse files
committed
add missing fetch script for the face dataset
1 parent 7703b95 commit c75b74b

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,5 @@ data/movie_reviews/poldata.README.2.0
1313
data/languages/paragraphs
1414
data/languages/short_paragraphs
1515
data/languages/html
16+
17+
data/labeled_faces_wild/lfw_preprocessed/

data/labeled_faces_wild/fetch_data.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""Simple script to fetch a numpy version of the LFW data
2+
3+
Original dataset and credits available at:
4+
5+
http://vis-www.cs.umass.edu/lfw/
6+
7+
"""
8+
import os
9+
import urllib2
10+
11+
URL = "https://downloads.sourceforge.net/project/scikit-learn/data/lfw_preprocessed.tar.gz"
12+
ARCHIVE_NAME = "lfw_preprocessed.tar.gz"
13+
FOLDER_NAME = "lfw_preprocessed"
14+
15+
if not os.path.exists(FOLDER_NAME):
16+
if not os.path.exists(ARCHIVE_NAME):
17+
print "Downloading data, please Wait (58.8MB)..."
18+
print URL
19+
opener = urllib2.urlopen(URL)
20+
open(ARCHIVE_NAME, 'wb').write(opener.read())
21+
print
22+
23+
import tarfile
24+
print "Decompressiong the archive: " + ARCHIVE_NAME
25+
tarfile.open(ARCHIVE_NAME, "r:gz").extractall()
26+
os.remove(ARCHIVE_NAME)
27+

0 commit comments

Comments
 (0)