Skip to content

feat: add a base model to the repository #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: feat/workspace
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 199 additions & 0 deletions model/ver20220624/model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
{
"modelTopology": {
"class_name": "Sequential",
"config": {
"name": "sequential_1",
"layers": [
{
"class_name": "Dense",
"config": {
"units": 32,
"activation": "relu",
"use_bias": true,
"kernel_initializer": {
"class_name": "VarianceScaling",
"config": {
"scale": 1,
"mode": "fan_avg",
"distribution": "normal",
"seed": null
}
},
"bias_initializer": { "class_name": "Zeros", "config": {} },
"kernel_regularizer": null,
"bias_regularizer": null,
"activity_regularizer": null,
"kernel_constraint": null,
"bias_constraint": null,
"name": "dense_Dense1",
"trainable": true,
"batch_input_shape": [null, 512],
"dtype": "float32"
}
},
{
"class_name": "BatchNormalization",
"config": {
"axis": -1,
"momentum": 0.99,
"epsilon": 0.001,
"center": true,
"scale": true,
"beta_initializer": { "class_name": "Zeros", "config": {} },
"gamma_initializer": { "class_name": "Ones", "config": {} },
"moving_mean_initializer": { "class_name": "Zeros", "config": {} },
"moving_variance_initializer": {
"class_name": "Ones",
"config": {}
},
"beta_regularizer": null,
"gamma_regularizer": null,
"beta_constraint": null,
"gamma_constraint": null,
"name": "batch_normalization_BatchNormalization1",
"trainable": true
}
},
{
"class_name": "Dense",
"config": {
"units": 32,
"activation": "relu",
"use_bias": true,
"kernel_initializer": {
"class_name": "VarianceScaling",
"config": {
"scale": 1,
"mode": "fan_avg",
"distribution": "normal",
"seed": null
}
},
"bias_initializer": { "class_name": "Zeros", "config": {} },
"kernel_regularizer": null,
"bias_regularizer": null,
"activity_regularizer": null,
"kernel_constraint": null,
"bias_constraint": null,
"name": "dense_Dense2",
"trainable": true
}
},
{
"class_name": "BatchNormalization",
"config": {
"axis": -1,
"momentum": 0.99,
"epsilon": 0.001,
"center": true,
"scale": true,
"beta_initializer": { "class_name": "Zeros", "config": {} },
"gamma_initializer": { "class_name": "Ones", "config": {} },
"moving_mean_initializer": { "class_name": "Zeros", "config": {} },
"moving_variance_initializer": {
"class_name": "Ones",
"config": {}
},
"beta_regularizer": null,
"gamma_regularizer": null,
"beta_constraint": null,
"gamma_constraint": null,
"name": "batch_normalization_BatchNormalization2",
"trainable": true
}
},
{
"class_name": "Dense",
"config": {
"units": 1,
"activation": "sigmoid",
"use_bias": true,
"kernel_initializer": {
"class_name": "VarianceScaling",
"config": {
"scale": 1,
"mode": "fan_avg",
"distribution": "normal",
"seed": null
}
},
"bias_initializer": { "class_name": "Zeros", "config": {} },
"kernel_regularizer": null,
"bias_regularizer": null,
"activity_regularizer": null,
"kernel_constraint": null,
"bias_constraint": null,
"name": "dense_Dense3",
"trainable": true
}
}
]
},
"keras_version": "tfjs-layers 3.18.0",
"backend": "tensor_flow.js"
},
"weightsManifest": [
{
"paths": ["weights.bin"],
"weights": [
{
"name": "dense_Dense1/kernel",
"shape": [512, 32],
"dtype": "float32"
},
{ "name": "dense_Dense1/bias", "shape": [32], "dtype": "float32" },
{
"name": "batch_normalization_BatchNormalization1/gamma",
"shape": [32],
"dtype": "float32"
},
{
"name": "batch_normalization_BatchNormalization1/beta",
"shape": [32],
"dtype": "float32"
},
{
"name": "dense_Dense2/kernel",
"shape": [32, 32],
"dtype": "float32"
},
{ "name": "dense_Dense2/bias", "shape": [32], "dtype": "float32" },
{
"name": "batch_normalization_BatchNormalization2/gamma",
"shape": [32],
"dtype": "float32"
},
{
"name": "batch_normalization_BatchNormalization2/beta",
"shape": [32],
"dtype": "float32"
},
{ "name": "dense_Dense3/kernel", "shape": [32, 1], "dtype": "float32" },
{ "name": "dense_Dense3/bias", "shape": [1], "dtype": "float32" },
{
"name": "batch_normalization_BatchNormalization1/moving_mean",
"shape": [32],
"dtype": "float32"
},
{
"name": "batch_normalization_BatchNormalization1/moving_variance",
"shape": [32],
"dtype": "float32"
},
{
"name": "batch_normalization_BatchNormalization2/moving_mean",
"shape": [32],
"dtype": "float32"
},
{
"name": "batch_normalization_BatchNormalization2/moving_variance",
"shape": [32],
"dtype": "float32"
}
]
}
],
"format": "layers-model",
"generatedBy": "TensorFlow.js tfjs-layers v3.18.0",
"convertedBy": null
}
Binary file added model/ver20220624/weights.bin
Binary file not shown.
76 changes: 76 additions & 0 deletions trainer/datasets.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import * as use from '@tensorflow-models/universal-sentence-encoder'
import * as tf from '@tensorflow/tfjs-node'

/**
* 스마일게이트 데이터셋을 universal-sentence-encoder 를 통해 encoding한 tf.data.Dataset을 반환한다.
* @param filepath 데이터셋 CSV URL @see getUnsmileDataUrl
* @param encoder use.UniversalSentenceEncoder 를 사용하여 string을 인코딩
* @link https://github.com/smilegate-ai/korean_unsmile_dataset
*/
async function loadUnsmileData({
filepath,
encoder,
}: {
filepath: string
encoder: use.UniversalSentenceEncoder
}): Promise<tf.data.Dataset<tf.TensorContainer>> {
return tf.data
.csv(filepath, {
delimiter: '\t',
hasHeader: true,
configuredColumnsOnly: true,
columnConfigs: {
clean: {
dtype: 'int32',
isLabel: true,
},
문장: {
dtype: 'string',
},
},
})
.mapAsync(async (data: any) => {
const out = await encoder.embed(data.xs['문장'])
return {
xs: out.flatten(),
ys: Object.values(data.ys),
}
})
.batch(32)
.shuffle(32)
}

/**
* 스마일게이트 데이터셋을 universal-sentence-encoder 를 통해 encoding한 tf.data.Dataset을 반환한다.
* 학습 데이터와 밸리데이션 데이터를 tf.data.Dataset 형태로 반환한다.
*
* @param encoder use.UniversalSentenceEncoder
* @returns
*/
export async function loadUnsmileTrainValidData(
encoder: use.UniversalSentenceEncoder,
): Promise<{
trainData: tf.data.Dataset<tf.TensorContainer>
valData: tf.data.Dataset<tf.TensorContainer>
}> {
const trainData = await loadUnsmileData({
filepath: getUnsmileDataUrl('train', 'v1.0'),
encoder,
})
const valData = await loadUnsmileData({
filepath: getUnsmileDataUrl('valid', 'v1.0'),
encoder,
})
return { trainData, valData }
}

/**
* 스마일게이트 데이터셋 CSV URL을 위한 도움 함수.
*
* @param type "train" or "valid"
* @param version "v1.0"
* @returns full url path
*/
function getUnsmileDataUrl(type: string, version: string): string {
return `https://raw.githubusercontent.com/smilegate-ai/korean_unsmile_dataset/main/unsmile_${type}_${version}.tsv`
}
44 changes: 44 additions & 0 deletions trainer/model.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import * as tf from '@tensorflow/tfjs-node'
import path from 'path'

const FILE_SCHEME = 'file://'

/**
* 모델을 불러오거나 불러오는데 실패할 경우 새로운 모델을 생성한다.
*
* @param modelDirectoryPath 저장된 모델의 경로. 인풋 형식은 https://www.tensorflow.org/js/guide/save_load 참조 할 것.
* @returns 학습 모델을 반환
*/
export async function getModel(
modelDirectoryPath: string,
): Promise<tf.LayersModel | tf.Sequential> {
try {
const modelPath =
FILE_SCHEME +
path.join(modelDirectoryPath.replace(FILE_SCHEME, ''), 'model.json')
console.info(`Trying to load a model from ${modelPath}`)
return await tf.loadLayersModel(modelPath)
} catch (e) {
console.warn(e)
console.warn(`Unable to load a model. Creating a new model`)
return tf.sequential({
layers: [
tf.layers.dense({
inputDim: 512,
units: 32,
activation: 'relu',
}),
tf.layers.batchNormalization(),
tf.layers.dense({
units: 32,
activation: 'relu',
}),
tf.layers.batchNormalization(),
tf.layers.dense({
units: 1,
activation: 'sigmoid',
}),
],
})
}
}
2 changes: 1 addition & 1 deletion trainer/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"description": "",
"main": "index.js",
"scripts": {
"build": "npx ts-node trainer.ts"
"start": "ts-node trainer.ts"
},
"keywords": [],
"author": "",
Expand Down
Loading