Skip to content

Commit ca67d16

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 0df96763da31377fe52aad7f6e9a6fcea74ccf61
1 parent 537b78e commit ca67d16

File tree

1,513 files changed

+5880
-5889
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,513 files changed

+5880
-5889
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/c08598f3ffe66017f7cad294026ee0b9/plot_out_of_core_classification.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"class ReutersParser(HTMLParser):\n \"\"\"Utility class to parse a SGML file and yield documents one at a time.\"\"\"\n\n def __init__(self, encoding=\"latin-1\"):\n HTMLParser.__init__(self)\n self._reset()\n self.encoding = encoding\n\n def handle_starttag(self, tag, attrs):\n method = \"start_\" + tag\n getattr(self, method, lambda x: None)(attrs)\n\n def handle_endtag(self, tag):\n method = \"end_\" + tag\n getattr(self, method, lambda: None)()\n\n def _reset(self):\n self.in_title = 0\n self.in_body = 0\n self.in_topics = 0\n self.in_topic_d = 0\n self.title = \"\"\n self.body = \"\"\n self.topics = []\n self.topic_d = \"\"\n\n def parse(self, fd):\n self.docs = []\n for chunk in fd:\n self.feed(chunk.decode(self.encoding))\n for doc in self.docs:\n yield doc\n self.docs = []\n self.close()\n\n def handle_data(self, data):\n if self.in_body:\n self.body += data\n elif self.in_title:\n self.title += data\n elif self.in_topic_d:\n self.topic_d += data\n\n def start_reuters(self, attributes):\n pass\n\n def end_reuters(self):\n self.body = re.sub(r\"\\s+\", r\" \", self.body)\n self.docs.append(\n {\"title\": self.title, \"body\": self.body, \"topics\": self.topics}\n )\n self._reset()\n\n def start_title(self, attributes):\n self.in_title = 1\n\n def end_title(self):\n self.in_title = 0\n\n def start_body(self, attributes):\n self.in_body = 1\n\n def end_body(self):\n self.in_body = 0\n\n def start_topics(self, attributes):\n self.in_topics = 1\n\n def end_topics(self):\n self.in_topics = 0\n\n def start_d(self, attributes):\n self.in_topic_d = 1\n\n def end_d(self):\n self.in_topic_d = 0\n self.topics.append(self.topic_d)\n self.topic_d = \"\"\n\n\ndef stream_reuters_documents(data_path=None):\n \"\"\"Iterate over documents of the Reuters dataset.\n\n The Reuters archive will automatically be downloaded and uncompressed if\n the `data_path` directory does not exist.\n\n Documents are represented as dictionaries with 'body' (str),\n 'title' (str), 'topics' (list(str)) keys.\n\n \"\"\"\n\n DOWNLOAD_URL = (\n \"http://archive.ics.uci.edu/ml/machine-learning-databases/\"\n \"reuters21578-mld/reuters21578.tar.gz\"\n )\n ARCHIVE_SHA256 = \"3bae43c9b14e387f76a61b6d82bf98a4fb5d3ef99ef7e7075ff2ccbcf59f9d30\"\n ARCHIVE_FILENAME = \"reuters21578.tar.gz\"\n\n if data_path is None:\n data_path = Path(get_data_home()) / \"reuters\"\n else:\n data_path = Path(data_path)\n if not data_path.exists():\n \"\"\"Download the dataset.\"\"\"\n print(\"downloading dataset (once and for all) into %s\" % data_path)\n data_path.mkdir(parents=True, exist_ok=True)\n\n def progress(blocknum, bs, size):\n total_sz_mb = \"%.2f MB\" % (size / 1e6)\n current_sz_mb = \"%.2f MB\" % ((blocknum * bs) / 1e6)\n if _not_in_sphinx():\n sys.stdout.write(\"\\rdownloaded %s / %s\" % (current_sz_mb, total_sz_mb))\n\n archive_path = data_path / ARCHIVE_FILENAME\n\n urlretrieve(DOWNLOAD_URL, filename=archive_path, reporthook=progress)\n if _not_in_sphinx():\n sys.stdout.write(\"\\r\")\n\n # Check that the archive was not tampered:\n assert sha256(archive_path.read_bytes()).hexdigest() == ARCHIVE_SHA256\n\n print(\"untarring Reuters dataset...\")\n with tarfile.open(archive_path, \"r:gz\") as fp:\n fp.extractall(data_path, filter=\"data\")\n print(\"done.\")\n\n parser = ReutersParser()\n for filename in data_path.glob(\"*.sgm\"):\n for doc in parser.parse(open(filename, \"rb\")):\n yield doc"
36+
"class ReutersParser(HTMLParser):\n \"\"\"Utility class to parse a SGML file and yield documents one at a time.\"\"\"\n\n def __init__(self, encoding=\"latin-1\"):\n HTMLParser.__init__(self)\n self._reset()\n self.encoding = encoding\n\n def handle_starttag(self, tag, attrs):\n method = \"start_\" + tag\n getattr(self, method, lambda x: None)(attrs)\n\n def handle_endtag(self, tag):\n method = \"end_\" + tag\n getattr(self, method, lambda: None)()\n\n def _reset(self):\n self.in_title = 0\n self.in_body = 0\n self.in_topics = 0\n self.in_topic_d = 0\n self.title = \"\"\n self.body = \"\"\n self.topics = []\n self.topic_d = \"\"\n\n def parse(self, fd):\n self.docs = []\n for chunk in fd:\n self.feed(chunk.decode(self.encoding))\n for doc in self.docs:\n yield doc\n self.docs = []\n self.close()\n\n def handle_data(self, data):\n if self.in_body:\n self.body += data\n elif self.in_title:\n self.title += data\n elif self.in_topic_d:\n self.topic_d += data\n\n def start_reuters(self, attributes):\n pass\n\n def end_reuters(self):\n self.body = re.sub(r\"\\s+\", r\" \", self.body)\n self.docs.append(\n {\"title\": self.title, \"body\": self.body, \"topics\": self.topics}\n )\n self._reset()\n\n def start_title(self, attributes):\n self.in_title = 1\n\n def end_title(self):\n self.in_title = 0\n\n def start_body(self, attributes):\n self.in_body = 1\n\n def end_body(self):\n self.in_body = 0\n\n def start_topics(self, attributes):\n self.in_topics = 1\n\n def end_topics(self):\n self.in_topics = 0\n\n def start_d(self, attributes):\n self.in_topic_d = 1\n\n def end_d(self):\n self.in_topic_d = 0\n self.topics.append(self.topic_d)\n self.topic_d = \"\"\n\n\ndef stream_reuters_documents(data_path=None):\n \"\"\"Iterate over documents of the Reuters dataset.\n\n The Reuters archive will automatically be downloaded and uncompressed if\n the `data_path` directory does not exist.\n\n Documents are represented as dictionaries with 'body' (str),\n 'title' (str), 'topics' (list(str)) keys.\n\n \"\"\"\n\n DOWNLOAD_URL = \"https://kdd.ics.uci.edu/databases/reuters21578/reuters21578.tar.gz\"\n ARCHIVE_SHA256 = \"3bae43c9b14e387f76a61b6d82bf98a4fb5d3ef99ef7e7075ff2ccbcf59f9d30\"\n ARCHIVE_FILENAME = \"reuters21578.tar.gz\"\n\n if data_path is None:\n data_path = Path(get_data_home()) / \"reuters\"\n else:\n data_path = Path(data_path)\n if not data_path.exists():\n \"\"\"Download the dataset.\"\"\"\n print(\"downloading dataset (once and for all) into %s\" % data_path)\n data_path.mkdir(parents=True, exist_ok=True)\n\n def progress(blocknum, bs, size):\n total_sz_mb = \"%.2f MB\" % (size / 1e6)\n current_sz_mb = \"%.2f MB\" % ((blocknum * bs) / 1e6)\n if _not_in_sphinx():\n sys.stdout.write(\"\\rdownloaded %s / %s\" % (current_sz_mb, total_sz_mb))\n\n archive_path = data_path / ARCHIVE_FILENAME\n\n urlretrieve(DOWNLOAD_URL, filename=archive_path, reporthook=progress)\n if _not_in_sphinx():\n sys.stdout.write(\"\\r\")\n\n # Check that the archive was not tampered:\n assert sha256(archive_path.read_bytes()).hexdigest() == ARCHIVE_SHA256\n\n print(\"untarring Reuters dataset...\")\n with tarfile.open(archive_path, \"r:gz\") as fp:\n fp.extractall(data_path, filter=\"data\")\n print(\"done.\")\n\n parser = ReutersParser()\n for filename in data_path.glob(\"*.sgm\"):\n for doc in parser.parse(open(filename, \"rb\")):\n yield doc"
3737
]
3838
},
3939
{
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/f7c999465d2f8d68e0c04bec778aa48e/plot_out_of_core_classification.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,7 @@ def stream_reuters_documents(data_path=None):
142142
143143
"""
144144

145-
DOWNLOAD_URL = (
146-
"http://archive.ics.uci.edu/ml/machine-learning-databases/"
147-
"reuters21578-mld/reuters21578.tar.gz"
148-
)
145+
DOWNLOAD_URL = "https://kdd.ics.uci.edu/databases/reuters21578/reuters21578.tar.gz"
149146
ARCHIVE_SHA256 = "3bae43c9b14e387f76a61b6d82bf98a4fb5d3ef99ef7e7075ff2ccbcf59f9d30"
150147
ARCHIVE_FILENAME = "reuters21578.tar.gz"
151148

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/scikit-learn-docs.zip

1.71 KB
Binary file not shown.
-214 Bytes
-75 Bytes
-45 Bytes

0 commit comments

Comments
 (0)