sungyubkim
diff --git a/‎.ipynb_checkpoints/Brief Pytorch Tutorials-checkpoint.ipynb
Lines changed: 338 additions & 0 deletions b/‎.ipynb_checkpoints/Brief Pytorch Tutorials-checkpoint.ipynb
Lines changed: 338 additions & 0 deletions
@@ -0,0 +1,338 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "# 네트워크와 레이어들을 정리하기 위한 모듈\n",
+    "import torch.nn.functional as F\n",
+    "# 구조 상에서 표현하기 힘든 경우나 loss function을 바로 가져다 쓰고 싶은 경우\n",
+    "import torch.autograd as autograd\n",
+    "# backprop과정과 gradient를 직접 수정하고 싶은 경우"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. 네트워크 정의"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "NN(\n",
+      "  (layers): Sequential(\n",
+      "    (0): Linear(in_features=25, out_features=128, bias=True)\n",
+      "    (1): Dropout(p=0.5)\n",
+      "    (2): ReLU()\n",
+      "    (3): Linear(in_features=128, out_features=128, bias=True)\n",
+      "    (4): ReLU()\n",
+      "    (5): Linear(in_features=128, out_features=10, bias=True)\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "class NN(nn.Module):\n",
+    "    # 네트워크는 nn.Module을 상속받아서 정의하는게 일반적입니다.\n",
+    "    def __init__(self, input_dim=25, hidden_dim=128, output_dim=10):\n",
+    "        super().__init__()\n",
+    "        self.layers = nn.Sequential(\n",
+    "            nn.Linear(input_dim, hidden_dim),\n",
+    "            # Dropout을 정의하고 싶으면 다음과 같이 할 수 있습니다.\n",
+    "            nn.Dropout(),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Linear(hidden_dim, hidden_dim),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Linear(hidden_dim, output_dim)\n",
+    "        )\n",
+    "        \n",
+    "        # 네트워크 파라미터 정의는 다음과 같이 할 수 있습니다.\n",
+    "        for m in self.modules():\n",
+    "#             print(m)\n",
+    "            if isinstance(m, nn.Linear):\n",
+    "                nn.init.xavier_normal_(m.weight)\n",
+    "                nn.init.constant_(m.bias, 0.0)\n",
+    "                \n",
+    "    def forward(self, x):\n",
+    "        # x 는 input을 의미하며, 일반적인 경우에는 (mini_batch, input_dim)\n",
+    "        # 이미지의 경우에는 (mini_batch, Channel, Height, Width) 의 형태를 가정합니다.\n",
+    "        x = self.layers(x)\n",
+    "        return x\n",
+    "net = NN()\n",
+    "print(net)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([32, 25]) torch.Size([32, 10])\n"
+     ]
+    }
+   ],
+   "source": [
+    "x = torch.rand((32, 25))\n",
+    "y = net(x)\n",
+    "print(x.shape, y.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. 네트워크 셋팅"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "False\n",
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Dropout/BatchNorm 등의 layer는 다음과 같이 제어할 수 있습니다.\n",
+    "net.eval()\n",
+    "print(net.training)\n",
+    "net.train()\n",
+    "print(net.training)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Loss 정의하기"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor(1.1736, grad_fn=<MseLossBackward>)\n",
+      "tensor(1.1736, grad_fn=<MseLossBackward>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = torch.randn_like(y)\n",
+    "\n",
+    "# 보통 loss는 다음 2가지의 형태로 정의합니다.\n",
+    "# 1. nn에서 함수 형태로 가져오는 경우\n",
+    "cost_func = nn.MSELoss()\n",
+    "loss = cost_func(y, target)\n",
+    "print(loss)\n",
+    "\n",
+    "# 2. functional에서 바로 부르는 경우\n",
+    "loss = F.mse_loss(y, target)\n",
+    "print(loss)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Gradient 조작"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([128, 25])\n",
+      "torch.Size([128])\n",
+      "torch.Size([128, 128])\n",
+      "torch.Size([128])\n",
+      "torch.Size([10, 128])\n",
+      "torch.Size([10])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 많은 경우 gradient계산은 다음과 같이 실행합니다.\n",
+    "loss.backward()\n",
+    "for param in net.parameters():\n",
+    "    print(param.grad.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([32, 25])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 한편 필요에 의해서는 명시적으로 gradient를 구해야할 필요가 있습니다. 이때는 다음과 같이 조절할 수 있습니다.\n",
+    "x = torch.rand((32, 25)).requires_grad_(True)\n",
+    "y = net(x)\n",
+    "grad = autograd.grad(y, x,\\\n",
+    "                     grad_outputs=torch.ones_like(y), retain_graph=None, create_graph=False,\\\n",
+    "                     only_inputs=True, allow_unused=False)[0]\n",
+    "print(grad.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Tensor 조작"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# numpy의 데이터들은 대부분 다음과 같이 pytorch의 tensor로 만들 수 있습니다.\n",
+    "import numpy as np\n",
+    "x = np.random.randn(32, 25)\n",
+    "x = torch.FloatTensor(x)\n",
+    "# 바뀐 tensor는 다음과 같이 GPU로 올라갈 수 있습니다.\n",
+    "# x = x.cuda()\n",
+    "# x.cuda_()\n",
+    "# 한편 네트워크를 GPU에 올릴 때는 cuda함수만 호출해도 충분합니다.\n",
+    "# net.cuda()\n",
+    "# GPU에 올라간 tensor는 다음 코드로 다시 numpy로 바꿀 수 있습니다.\n",
+    "# x = x.detach().cpu().numpy()\n",
+    "# print(x.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([32, 25])\n",
+      "torch.Size([32, 1])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 한편 tensor에서의 indexing은 다음과 같이 vectorize할 수 있습니다.\n",
+    "indices = [np.random.choice(25) for _ in range(32)]\n",
+    "indices = torch.LongTensor(indices) # (32)\n",
+    "x_selected = x.gather(1, indices.unsqueeze(1))\n",
+    "print(x.shape)\n",
+    "print(x_selected.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([32, 25])\n",
+      "torch.Size([32, 1, 25])\n",
+      "torch.Size([32, 1, 25])\n",
+      "torch.Size([32, 25])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 위에서 unsqueeze/squeeze 함수는 데이터의 전체 크기는 유지하되 axis를 추가/제거하는 함수입니다.\n",
+    "x = torch.randn((32, 25))\n",
+    "print(x.shape)\n",
+    "x.unsqueeze_(1)\n",
+    "print(x.shape)\n",
+    "x.squeeze_(2)\n",
+    "print(x.shape)\n",
+    "x.squeeze_(1)\n",
+    "print(x.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([32, 1, 25])\n",
+      "torch.Size([800])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 비슷하게 view 함수를 사용할 수도 있지만, dim이 섞일 위험이 있으니 조심하세요!\n",
+    "x = x.view(x.size(0), 1, -1)\n",
+    "print(x.shape)\n",
+    "x = x.view(-1)\n",
+    "print(x.shape)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}