-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmitie.go
127 lines (106 loc) · 2.5 KB
/
mitie.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package linguo
/*
#cgo LDFLAGS: -L/usr/local/lib -lmitie
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mitie.h"
typedef struct {
const char* model;
double score;
const char* value;
} Entity;
char * my_strcat(const char * str1, const char * str2);
Entity get_entity(char** tokens,
const mitie_named_entity_detections* dets,
unsigned long i) {
Entity entity;
unsigned long pos, len;
pos = mitie_ner_get_detection_position(dets, i);
len = mitie_ner_get_detection_length(dets, i);
double score = mitie_ner_get_detection_score(dets,i);
const char* model = mitie_ner_get_detection_tagstr(dets,i);
entity.model = model;
entity.score = score;
const char* value = "";
while(len > 0)
{
value = my_strcat(value, " ");
value = my_strcat(value, tokens[pos++]);
len--;
}
entity.value = value;
return entity;
}
char * my_strcat(const char * str1, const char * str2)
{
char * ret = malloc(strlen(str1)+strlen(str2)+1);
if(ret!=NULL)
{
sprintf(ret, "%s%s", str1, str2);
return ret;
}
return NULL;
}
void releaseTokens(char** tokens) {
mitie_free(tokens);
}
void releaseDets(mitie_named_entity_detections* dets) {
mitie_free(dets);
}
*/
import "C"
import (
"fmt"
"strings"
"unsafe"
"github.com/abiosoft/semaphore"
"github.com/ruggi/linguo/models"
set "gopkg.in/fatih/set.v0"
)
type MITIE struct {
ner *C.mitie_named_entity_extractor
sem *semaphore.Semaphore
}
func NewMITIE(filepath string) *MITIE {
ner := C.mitie_load_named_entity_extractor(C.CString(filepath))
sem := semaphore.New(4)
return &MITIE{
ner: ner,
sem: sem,
}
}
func (this *MITIE) Release() {
C.mitie_free(unsafe.Pointer(this.ner))
}
func (this *MITIE) Process(body string) []*models.Entity {
tokens := C.mitie_tokenize(C.CString(body))
if tokens == nil {
return nil
}
defer C.mitie_free(unsafe.Pointer(tokens))
dets := C.mitie_extract_entities(this.ner, tokens)
if dets == nil {
return nil
}
defer C.mitie_free(unsafe.Pointer(dets))
num_dets := C.mitie_ner_get_num_detections(dets)
duplicates := set.New()
var entities []*models.Entity
for i := 0; i < int(num_dets); i++ {
centity := C.get_entity(tokens, dets, C.ulong(i))
model := C.GoString(centity.model)
score := float64(centity.score)
value := C.GoString(centity.value)
key := fmt.Sprintf("%s:%s", value, model)
if duplicates.Has(key) {
continue
}
duplicates.Add(key)
if score > 0.5 {
entity := models.NewEntity(model, score, strings.TrimSpace(value))
entities = append(entities, entity)
}
}
return entities
}