codingpot · kkweon · Jun 25, 2022
diff --git a/model/ver20220624/model.json b/model/ver20220624/model.json
@@ -0,0 +1,199 @@
+{
+  "modelTopology": {
+    "class_name": "Sequential",
+    "config": {
+      "name": "sequential_1",
+      "layers": [
+        {
+          "class_name": "Dense",
+          "config": {
+            "units": 32,
+            "activation": "relu",
+            "use_bias": true,
+            "kernel_initializer": {
+              "class_name": "VarianceScaling",
+              "config": {
+                "scale": 1,
+                "mode": "fan_avg",
+                "distribution": "normal",
+                "seed": null
+              }
+            },
+            "bias_initializer": { "class_name": "Zeros", "config": {} },
+            "kernel_regularizer": null,
+            "bias_regularizer": null,
+            "activity_regularizer": null,
+            "kernel_constraint": null,
+            "bias_constraint": null,
+            "name": "dense_Dense1",
+            "trainable": true,
+            "batch_input_shape": [null, 512],
+            "dtype": "float32"
+          }
+        },
+        {
+          "class_name": "BatchNormalization",
+          "config": {
+            "axis": -1,
+            "momentum": 0.99,
+            "epsilon": 0.001,
+            "center": true,
+            "scale": true,
+            "beta_initializer": { "class_name": "Zeros", "config": {} },
+            "gamma_initializer": { "class_name": "Ones", "config": {} },
+            "moving_mean_initializer": { "class_name": "Zeros", "config": {} },
+            "moving_variance_initializer": {
+              "class_name": "Ones",
+              "config": {}
+            },
+            "beta_regularizer": null,
+            "gamma_regularizer": null,
+            "beta_constraint": null,
+            "gamma_constraint": null,
+            "name": "batch_normalization_BatchNormalization1",
+            "trainable": true
+          }
+        },
+        {
+          "class_name": "Dense",
+          "config": {
+            "units": 32,
+            "activation": "relu",
+            "use_bias": true,
+            "kernel_initializer": {
+              "class_name": "VarianceScaling",
+              "config": {
+                "scale": 1,
+                "mode": "fan_avg",
+                "distribution": "normal",
+                "seed": null
+              }
+            },
+            "bias_initializer": { "class_name": "Zeros", "config": {} },
+            "kernel_regularizer": null,
+            "bias_regularizer": null,
+            "activity_regularizer": null,
+            "kernel_constraint": null,
+            "bias_constraint": null,
+            "name": "dense_Dense2",
+            "trainable": true
+          }
+        },
+        {
+          "class_name": "BatchNormalization",
+          "config": {
+            "axis": -1,
+            "momentum": 0.99,
+            "epsilon": 0.001,
+            "center": true,
+            "scale": true,
+            "beta_initializer": { "class_name": "Zeros", "config": {} },
+            "gamma_initializer": { "class_name": "Ones", "config": {} },
+            "moving_mean_initializer": { "class_name": "Zeros", "config": {} },
+            "moving_variance_initializer": {
+              "class_name": "Ones",
+              "config": {}
+            },
+            "beta_regularizer": null,
+            "gamma_regularizer": null,
+            "beta_constraint": null,
+            "gamma_constraint": null,
+            "name": "batch_normalization_BatchNormalization2",
+            "trainable": true
+          }
+        },
+        {
+          "class_name": "Dense",
+          "config": {
+            "units": 1,
+            "activation": "sigmoid",
+            "use_bias": true,
+            "kernel_initializer": {
+              "class_name": "VarianceScaling",
+              "config": {
+                "scale": 1,
+                "mode": "fan_avg",
+                "distribution": "normal",
+                "seed": null
+              }
+            },
+            "bias_initializer": { "class_name": "Zeros", "config": {} },
+            "kernel_regularizer": null,
+            "bias_regularizer": null,
+            "activity_regularizer": null,
+            "kernel_constraint": null,
+            "bias_constraint": null,
+            "name": "dense_Dense3",
+            "trainable": true
+          }
+        }
+      ]
+    },
+    "keras_version": "tfjs-layers 3.18.0",
+    "backend": "tensor_flow.js"
+  },
+  "weightsManifest": [
+    {
+      "paths": ["weights.bin"],
+      "weights": [
+        {
+          "name": "dense_Dense1/kernel",
+          "shape": [512, 32],
+          "dtype": "float32"
+        },
+        { "name": "dense_Dense1/bias", "shape": [32], "dtype": "float32" },
+        {
+          "name": "batch_normalization_BatchNormalization1/gamma",
+          "shape": [32],
+          "dtype": "float32"
+        },
+        {
+          "name": "batch_normalization_BatchNormalization1/beta",
+          "shape": [32],
+          "dtype": "float32"
+        },
+        {
+          "name": "dense_Dense2/kernel",
+          "shape": [32, 32],
+          "dtype": "float32"
+        },
+        { "name": "dense_Dense2/bias", "shape": [32], "dtype": "float32" },
+        {
+          "name": "batch_normalization_BatchNormalization2/gamma",
+          "shape": [32],
+          "dtype": "float32"
+        },
+        {
+          "name": "batch_normalization_BatchNormalization2/beta",
+          "shape": [32],
+          "dtype": "float32"
+        },
+        { "name": "dense_Dense3/kernel", "shape": [32, 1], "dtype": "float32" },
+        { "name": "dense_Dense3/bias", "shape": [1], "dtype": "float32" },
+        {
+          "name": "batch_normalization_BatchNormalization1/moving_mean",
+          "shape": [32],
+          "dtype": "float32"
+        },
+        {
+          "name": "batch_normalization_BatchNormalization1/moving_variance",
+          "shape": [32],
+          "dtype": "float32"
+        },
+        {
+          "name": "batch_normalization_BatchNormalization2/moving_mean",
+          "shape": [32],
+          "dtype": "float32"
+        },
+        {
+          "name": "batch_normalization_BatchNormalization2/moving_variance",
+          "shape": [32],
+          "dtype": "float32"
+        }
+      ]
+    }
+  ],
+  "format": "layers-model",
+  "generatedBy": "TensorFlow.js tfjs-layers v3.18.0",
+  "convertedBy": null
+}
diff --git a/model/ver20220624/weights.bin b/model/ver20220624/weights.bin
diff --git a/trainer/datasets.ts b/trainer/datasets.ts
@@ -0,0 +1,76 @@
+import * as use from '@tensorflow-models/universal-sentence-encoder'
+import * as tf from '@tensorflow/tfjs-node'
+
+/**
+ * 스마일게이트 데이터셋을 universal-sentence-encoder 를 통해 encoding한 tf.data.Dataset을 반환한다.
+ * @param filepath 데이터셋 CSV URL @see getUnsmileDataUrl
+ * @param encoder use.UniversalSentenceEncoder 를 사용하여 string을 인코딩
+ * @link https://github.com/smilegate-ai/korean_unsmile_dataset
+ */
+async function loadUnsmileData({
+  filepath,
+  encoder,
+}: {
+  filepath: string
+  encoder: use.UniversalSentenceEncoder
+}): Promise<tf.data.Dataset<tf.TensorContainer>> {
+  return tf.data
+    .csv(filepath, {
+      delimiter: '\t',
+      hasHeader: true,
+      configuredColumnsOnly: true,
+      columnConfigs: {
+        clean: {
+          dtype: 'int32',
+          isLabel: true,
+        },
+        문장: {
+          dtype: 'string',
+        },
+      },
+    })
+    .mapAsync(async (data: any) => {
+      const out = await encoder.embed(data.xs['문장'])
+      return {
+        xs: out.flatten(),
+        ys: Object.values(data.ys),
+      }
+    })
+    .batch(32)
+    .shuffle(32)
+}
+
+/**
+ * 스마일게이트 데이터셋을 universal-sentence-encoder 를 통해 encoding한 tf.data.Dataset을 반환한다.
+ * 학습 데이터와 밸리데이션 데이터를 tf.data.Dataset 형태로 반환한다.
+ *
+ * @param encoder use.UniversalSentenceEncoder
+ * @returns
+ */
+export async function loadUnsmileTrainValidData(
+  encoder: use.UniversalSentenceEncoder,
+): Promise<{
+  trainData: tf.data.Dataset<tf.TensorContainer>
+  valData: tf.data.Dataset<tf.TensorContainer>
+}> {
+  const trainData = await loadUnsmileData({
+    filepath: getUnsmileDataUrl('train', 'v1.0'),
+    encoder,
+  })
+  const valData = await loadUnsmileData({
+    filepath: getUnsmileDataUrl('valid', 'v1.0'),
+    encoder,
+  })
+  return { trainData, valData }
+}
+
+/**
+ * 스마일게이트 데이터셋 CSV URL을 위한 도움 함수.
+ *
+ * @param type "train" or "valid"
+ * @param version "v1.0"
+ * @returns full url path
+ */
+function getUnsmileDataUrl(type: string, version: string): string {
+  return `https://raw.githubusercontent.com/smilegate-ai/korean_unsmile_dataset/main/unsmile_${type}_${version}.tsv`
+}
diff --git a/trainer/model.ts b/trainer/model.ts
@@ -0,0 +1,44 @@
+import * as tf from '@tensorflow/tfjs-node'
+import path from 'path'
+
+const FILE_SCHEME = 'file://'
+
+/**
+ * 모델을 불러오거나 불러오는데 실패할 경우 새로운 모델을 생성한다.
+ *
+ * @param modelDirectoryPath 저장된 모델의 경로. 인풋 형식은 https://www.tensorflow.org/js/guide/save_load 참조 할 것.
+ * @returns 학습 모델을 반환
+ */
+export async function getModel(
+  modelDirectoryPath: string,
+): Promise<tf.LayersModel | tf.Sequential> {
+  try {
+    const modelPath =
+      FILE_SCHEME +
+      path.join(modelDirectoryPath.replace(FILE_SCHEME, ''), 'model.json')
+    console.info(`Trying to load a model from ${modelPath}`)
+    return await tf.loadLayersModel(modelPath)
+  } catch (e) {
+    console.warn(e)
+    console.warn(`Unable to load a model. Creating a new model`)
+    return tf.sequential({
+      layers: [
+        tf.layers.dense({
+          inputDim: 512,
+          units: 32,
+          activation: 'relu',
+        }),
+        tf.layers.batchNormalization(),
+        tf.layers.dense({
+          units: 32,
+          activation: 'relu',
+        }),
+        tf.layers.batchNormalization(),
+        tf.layers.dense({
+          units: 1,
+          activation: 'sigmoid',
+        }),
+      ],
+    })
+  }
+}
diff --git a/trainer/package.json b/trainer/package.json
@@ -4,7 +4,7 @@
   "description": "",
   "main": "index.js",
   "scripts": {
-    "build": "npx ts-node trainer.ts"
+    "start": "ts-node trainer.ts"
   },
   "keywords": [],
   "author": "",