FlairでELMoをEmbeddingとして使う
from flair.data import Corpus
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
from flair.data import Sentence
from flair.models import SequenceTagger
from flair.embeddings import (
WordEmbeddings,
CharacterEmbeddings,
FlairEmbeddings,
BertEmbeddings,
)
from flair.data import Corpus
from flair.datasets import ColumnCorpus
import flair.datasets
def ElmoEmbeddings():
options = "/path/to/elmo/elmo_options.json"
weights = "/path/to/elmo/elmo_weights.hdf5"
from flair.embeddings import ELMoEmbeddings
embedding = ELMoEmbeddings('custom', options_file = options, weight_file= weights)
return embedding
if __name__ == "__main__":
datapath = "/path/to/BIO" # train.tsv, test.tsv, devel.tsvが入っているフォルダ
corpus: Corpus = loadCorpus(datapath)
tag_type = "ner"
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
embedding_objects: List[TokenEmbeddings] = []
embedding_objects.append(ElmoEmbeddings())
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_objects)
tagger: SequenceTagger = SequenceTagger(
hidden_size=256,
embeddings=embeddings,
tag_dictionary=tag_dictionary,
tag_type=tag_type,
use_crf=True,
)
from flair.trainers import ModelTrainer
resultpath = "/path/to/result"
trainer: ModelTrainer = ModelTrainer(tagger, corpus)
trainer.train(
str(resultpath),
learning_rate=0.1,
mini_batch_size=128,
max_epochs=3,
patience=5,
embeddings_storage_mode="gpu"
)
from flair.data import Corpus
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
from flair.data import Sentence
from flair.models import SequenceTagger
from flair.embeddings import (
WordEmbeddings,
CharacterEmbeddings,
FlairEmbeddings,
BertEmbeddings,
)
from flair.data import Corpus
from flair.datasets import ColumnCorpus
import flair.datasets
def ElmoEmbeddings():
options = "/path/to/elmo/elmo_options.json"
weights = "/path/to/elmo/elmo_weights.hdf5"
from flair.embeddings import ELMoEmbeddings
embedding = ELMoEmbeddings('custom', options_file = options, weight_file= weights)
return embedding
if __name__ == "__main__":
datapath = "/path/to/BIO" # train.tsv, test.tsv, devel.tsvが入っているフォルダ
corpus: Corpus = loadCorpus(datapath)
tag_type = "ner"
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
embedding_objects: List[TokenEmbeddings] = []
embedding_objects.append(ElmoEmbeddings())
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_objects)
tagger: SequenceTagger = SequenceTagger(
hidden_size=256,
embeddings=embeddings,
tag_dictionary=tag_dictionary,
tag_type=tag_type,
use_crf=True,
)
from flair.trainers import ModelTrainer
resultpath = "/path/to/result"
trainer: ModelTrainer = ModelTrainer(tagger, corpus)
trainer.train(
str(resultpath),
learning_rate=0.1,
mini_batch_size=128,
max_epochs=3,
patience=5,
embeddings_storage_mode="gpu"
)
from flair.data import Corpus from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings from flair.data import Sentence from flair.models import SequenceTagger from flair.embeddings import ( WordEmbeddings, CharacterEmbeddings, FlairEmbeddings, BertEmbeddings, ) from flair.data import Corpus from flair.datasets import ColumnCorpus import flair.datasets def ElmoEmbeddings(): options = "/path/to/elmo/elmo_options.json" weights = "/path/to/elmo/elmo_weights.hdf5" from flair.embeddings import ELMoEmbeddings embedding = ELMoEmbeddings('custom', options_file = options, weight_file= weights) return embedding if __name__ == "__main__": datapath = "/path/to/BIO" # train.tsv, test.tsv, devel.tsvが入っているフォルダ corpus: Corpus = loadCorpus(datapath) tag_type = "ner" tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) embedding_objects: List[TokenEmbeddings] = [] embedding_objects.append(ElmoEmbeddings()) embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_objects) tagger: SequenceTagger = SequenceTagger( hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=True, ) from flair.trainers import ModelTrainer resultpath = "/path/to/result" trainer: ModelTrainer = ModelTrainer(tagger, corpus) trainer.train( str(resultpath), learning_rate=0.1, mini_batch_size=128, max_epochs=3, patience=5, embeddings_storage_mode="gpu" )
0
ディスカッション
コメント一覧
まだ、コメントがありません