metadata language: []
library_name: sentence-transformers
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- dataset_size:1K<n<10K
- loss:MatryoshkaLoss
- loss:CoSENTLoss
base_model: intfloat/multilingual-e5-large
metrics:
- pearson_cosine
- spearman_cosine
- pearson_manhattan
- spearman_manhattan
- pearson_euclidean
- spearman_euclidean
- pearson_dot
- spearman_dot
- pearson_max
- spearman_max
widget:
- source_sentence: El hombre captura una pelota
sentences:
- Un hombre lanza una pelota en el aire.
- Un hombre está acompañando a una mujer en el camino.
- Dos mujeres están cantando una hermosa canción.
- source_sentence: La mujer está cortando papas.
sentences:
- Una mujer está cortando patatas.
- Los patos blancos se encuentran parados en el suelo.
- Hay una banda tocando en el escenario principal.
- source_sentence: Un hombre está buscando algo.
sentences:
- En un mercado de granjeros, se encuentra un hombre.
- Romney filmó en una reunión privada de financiadores
- Dos perros de color negro están jugando en la hierba.
- source_sentence: Un hombre saltando la cuerda.
sentences:
- Un hombre está saltando la cuerda.
- La capital de Siria fue golpeada por dos explosiones
- Los gatitos están comiendo de los platos.
- source_sentence: El avión está tocando tierra.
sentences:
- El avión animado se encuentra en proceso de aterrizaje.
- Un pequeño niño montado en un columpio en el parque.
- Una persona de sexo femenino está cortando una cebolla.
pipeline_tag: sentence-similarity
model-index:
- name: SentenceTransformer based on intfloat/multilingual-e5-large
results:
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts dev 768
type: sts-dev-768
metrics:
- type: pearson_cosine
value: 0.8382359637067547
name: Pearson Cosine
- type: spearman_cosine
value: 0.8429605562993187
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8336600898033378
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8448900621318144
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.8328580183902631
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8441561677427524
name: Spearman Euclidean
- type: pearson_dot
value: 0.8287262441829462
name: Pearson Dot
- type: spearman_dot
value: 0.8322746204974042
name: Spearman Dot
- type: pearson_max
value: 0.8382359637067547
name: Pearson Max
- type: spearman_max
value: 0.8448900621318144
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts dev 512
type: sts-dev-512
metrics:
- type: pearson_cosine
value: 0.8334610747047482
name: Pearson Cosine
- type: spearman_cosine
value: 0.8405630189692351
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8316848819512679
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8426142019940397
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.8305903222472721
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8415256700272777
name: Spearman Euclidean
- type: pearson_dot
value: 0.8172993617433827
name: Pearson Dot
- type: spearman_dot
value: 0.823043401157181
name: Spearman Dot
- type: pearson_max
value: 0.8334610747047482
name: Pearson Max
- type: spearman_max
value: 0.8426142019940397
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts dev 256
type: sts-dev-256
metrics:
- type: pearson_cosine
value: 0.8240056098321313
name: Pearson Cosine
- type: spearman_cosine
value: 0.8355774999921849
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8261458415991961
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8355100986320139
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.825647934422587
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8362336344962497
name: Spearman Euclidean
- type: pearson_dot
value: 0.7924886689283153
name: Pearson Dot
- type: spearman_dot
value: 0.7992788592975302
name: Spearman Dot
- type: pearson_max
value: 0.8261458415991961
name: Pearson Max
- type: spearman_max
value: 0.8362336344962497
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts dev 128
type: sts-dev-128
metrics:
- type: pearson_cosine
value: 0.8098656853945027
name: Pearson Cosine
- type: spearman_cosine
value: 0.8304511476467773
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8208946291392102
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8308359029901535
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.8195023110971954
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8302481276550623
name: Spearman Euclidean
- type: pearson_dot
value: 0.7412744037070784
name: Pearson Dot
- type: spearman_dot
value: 0.7489986968697009
name: Spearman Dot
- type: pearson_max
value: 0.8208946291392102
name: Pearson Max
- type: spearman_max
value: 0.8308359029901535
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts dev 64
type: sts-dev-64
metrics:
- type: pearson_cosine
value: 0.7777717898212414
name: Pearson Cosine
- type: spearman_cosine
value: 0.8152005256760807
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8007095698339157
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8116493253806699
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.8000905317852872
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8110794468804238
name: Spearman Euclidean
- type: pearson_dot
value: 0.6540905690432955
name: Pearson Dot
- type: spearman_dot
value: 0.6589924104221199
name: Spearman Dot
- type: pearson_max
value: 0.8007095698339157
name: Pearson Max
- type: spearman_max
value: 0.8152005256760807
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts dev 32
type: sts-dev-32
metrics:
- type: pearson_cosine
value: 0.7276908730898617
name: Pearson Cosine
- type: spearman_cosine
value: 0.7805691037554072
name: Spearman Cosine
- type: pearson_manhattan
value: 0.7659952363354546
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.7751944660837697
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.7674462214503804
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.7773298298599879
name: Spearman Euclidean
- type: pearson_dot
value: 0.5395044219284906
name: Pearson Dot
- type: spearman_dot
value: 0.5341543426421572
name: Spearman Dot
- type: pearson_max
value: 0.7674462214503804
name: Pearson Max
- type: spearman_max
value: 0.7805691037554072
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts dev 16
type: sts-dev-16
metrics:
- type: pearson_cosine
value: 0.6737235484120327
name: Pearson Cosine
- type: spearman_cosine
value: 0.7425360948217027
name: Spearman Cosine
- type: pearson_manhattan
value: 0.7187007732867645
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.7279621825071231
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.7234911258158329
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.7374355146279606
name: Spearman Euclidean
- type: pearson_dot
value: 0.44701957007430754
name: Pearson Dot
- type: spearman_dot
value: 0.44243975098384164
name: Spearman Dot
- type: pearson_max
value: 0.7234911258158329
name: Pearson Max
- type: spearman_max
value: 0.7425360948217027
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts test 768
type: sts-test-768
metrics:
- type: pearson_cosine
value: 0.8637130740455785
name: Pearson Cosine
- type: spearman_cosine
value: 0.8774757245850818
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8739327947840198
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8771247494149252
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.8742964420051067
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8774039769000851
name: Spearman Euclidean
- type: pearson_dot
value: 0.8587248460103846
name: Pearson Dot
- type: spearman_dot
value: 0.8692624735733635
name: Spearman Dot
- type: pearson_max
value: 0.8742964420051067
name: Pearson Max
- type: spearman_max
value: 0.8774757245850818
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts test 512
type: sts-test-512
metrics:
- type: pearson_cosine
value: 0.8608902316971913
name: Pearson Cosine
- type: spearman_cosine
value: 0.8761454408181157
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8723366100239835
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8755119028724399
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.8727143818945785
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8758699632438892
name: Spearman Euclidean
- type: pearson_dot
value: 0.8498181878456328
name: Pearson Dot
- type: spearman_dot
value: 0.8568165420931783
name: Spearman Dot
- type: pearson_max
value: 0.8727143818945785
name: Pearson Max
- type: spearman_max
value: 0.8761454408181157
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts test 256
type: sts-test-256
metrics:
- type: pearson_cosine
value: 0.8546354043013908
name: Pearson Cosine
- type: spearman_cosine
value: 0.871536658256446
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8697716394077537
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8737030599161743
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.86989853825415
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8736845554686979
name: Spearman Euclidean
- type: pearson_dot
value: 0.8131428680674924
name: Pearson Dot
- type: spearman_dot
value: 0.8076436370339797
name: Spearman Dot
- type: pearson_max
value: 0.86989853825415
name: Pearson Max
- type: spearman_max
value: 0.8737030599161743
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts test 128
type: sts-test-128
metrics:
- type: pearson_cosine
value: 0.8387977115140051
name: Pearson Cosine
- type: spearman_cosine
value: 0.8645489592292456
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8611375341227384
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8667215229295422
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.862154474303328
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8680162798983022
name: Spearman Euclidean
- type: pearson_dot
value: 0.7492475609746636
name: Pearson Dot
- type: spearman_dot
value: 0.7363955675375832
name: Spearman Dot
- type: pearson_max
value: 0.862154474303328
name: Pearson Max
- type: spearman_max
value: 0.8680162798983022
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts test 64
type: sts-test-64
metrics:
- type: pearson_cosine
value: 0.8168102869303625
name: Pearson Cosine
- type: spearman_cosine
value: 0.8585329796388539
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8518107264951738
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8606717941407515
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.8533959511853835
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8623753165991692
name: Spearman Euclidean
- type: pearson_dot
value: 0.6646337116783656
name: Pearson Dot
- type: spearman_dot
value: 0.6473141838302237
name: Spearman Dot
- type: pearson_max
value: 0.8533959511853835
name: Pearson Max
- type: spearman_max
value: 0.8623753165991692
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts test 32
type: sts-test-32
metrics:
- type: pearson_cosine
value: 0.7813945227753345
name: Pearson Cosine
- type: spearman_cosine
value: 0.8424823964509079
name: Spearman Cosine
- type: pearson_manhattan
value: 0.8315336527432531
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.8431756901550471
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.8345328653107531
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8466076672836096
name: Spearman Euclidean
- type: pearson_dot
value: 0.5520860449837447
name: Pearson Dot
- type: spearman_dot
value: 0.5319238671245338
name: Spearman Dot
- type: pearson_max
value: 0.8345328653107531
name: Pearson Max
- type: spearman_max
value: 0.8466076672836096
name: Spearman Max
- task:
type: semantic-similarity
name: Semantic Similarity
dataset:
name: sts test 16
type: sts-test-16
metrics:
- type: pearson_cosine
value: 0.7198004009567176
name: Pearson Cosine
- type: spearman_cosine
value: 0.8072120165730962
name: Spearman Cosine
- type: pearson_manhattan
value: 0.7805727606105963
name: Pearson Manhattan
- type: spearman_manhattan
value: 0.7997833060148871
name: Spearman Manhattan
- type: pearson_euclidean
value: 0.7879106231813758
name: Pearson Euclidean
- type: spearman_euclidean
value: 0.8090073332632988
name: Spearman Euclidean
- type: pearson_dot
value: 0.44957276876149327
name: Pearson Dot
- type: spearman_dot
value: 0.4411623904572447
name: Spearman Dot
- type: pearson_max
value: 0.7879106231813758
name: Pearson Max
- type: spearman_max
value: 0.8090073332632988
name: Spearman Max
SentenceTransformer based on intfloat/multilingual-e5-large
This is a sentence-transformers model finetuned from intfloat/multilingual-e5-large on an augmented version of stsb_multi_es dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
Model Details
Model Description
Model Type: Sentence Transformer
Base model: intfloat/multilingual-e5-large
Maximum Sequence Length: 512 tokens
Output Dimensionality: 1024 tokens
Similarity Function: Cosine Similarity
Training Dataset:
Model Sources
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
(2): Normalize()
)
Usage
Direct Usage (Sentence Transformers)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("mrm8488/multilingual-e5-large-ft-sts-spanish-matryoshka-768-16-5e" )
sentences = [
'El avión está tocando tierra.' ,
'El avión animado se encuentra en proceso de aterrizaje.' ,
'Un pequeño niño montado en un columpio en el parque.' ,
]
embeddings = model.encode(sentences)
print (embeddings.shape)
similarities = model.similarity(embeddings, embeddings)
print (similarities.shape)
Evaluation
Metrics
Semantic Similarity
Metric
Value
pearson_cosine
0.8382
spearman_cosine
0.843
pearson_manhattan
0.8337
spearman_manhattan
0.8449
pearson_euclidean
0.8329
spearman_euclidean
0.8442
pearson_dot
0.8287
spearman_dot
0.8323
pearson_max
0.8382
spearman_max
0.8449
Semantic Similarity
Metric
Value
pearson_cosine
0.8335
spearman_cosine
0.8406
pearson_manhattan
0.8317
spearman_manhattan
0.8426
pearson_euclidean
0.8306
spearman_euclidean
0.8415
pearson_dot
0.8173
spearman_dot
0.823
pearson_max
0.8335
spearman_max
0.8426
Semantic Similarity
Metric
Value
pearson_cosine
0.824
spearman_cosine
0.8356
pearson_manhattan
0.8261
spearman_manhattan
0.8355
pearson_euclidean
0.8256
spearman_euclidean
0.8362
pearson_dot
0.7925
spearman_dot
0.7993
pearson_max
0.8261
spearman_max
0.8362
Semantic Similarity
Metric
Value
pearson_cosine
0.8099
spearman_cosine
0.8305
pearson_manhattan
0.8209
spearman_manhattan
0.8308
pearson_euclidean
0.8195
spearman_euclidean
0.8302
pearson_dot
0.7413
spearman_dot
0.749
pearson_max
0.8209
spearman_max
0.8308
Semantic Similarity
Metric
Value
pearson_cosine
0.7778
spearman_cosine
0.8152
pearson_manhattan
0.8007
spearman_manhattan
0.8116
pearson_euclidean
0.8001
spearman_euclidean
0.8111
pearson_dot
0.6541
spearman_dot
0.659
pearson_max
0.8007
spearman_max
0.8152
Semantic Similarity
Metric
Value
pearson_cosine
0.7277
spearman_cosine
0.7806
pearson_manhattan
0.766
spearman_manhattan
0.7752
pearson_euclidean
0.7674
spearman_euclidean
0.7773
pearson_dot
0.5395
spearman_dot
0.5342
pearson_max
0.7674
spearman_max
0.7806
Semantic Similarity
Metric
Value
pearson_cosine
0.6737
spearman_cosine
0.7425
pearson_manhattan
0.7187
spearman_manhattan
0.728
pearson_euclidean
0.7235
spearman_euclidean
0.7374
pearson_dot
0.447
spearman_dot
0.4424
pearson_max
0.7235
spearman_max
0.7425
Semantic Similarity
Metric
Value
pearson_cosine
0.8637
spearman_cosine
0.8775
pearson_manhattan
0.8739
spearman_manhattan
0.8771
pearson_euclidean
0.8743
spearman_euclidean
0.8774
pearson_dot
0.8587
spearman_dot
0.8693
pearson_max
0.8743
spearman_max
0.8775
Semantic Similarity
Metric
Value
pearson_cosine
0.8609
spearman_cosine
0.8761
pearson_manhattan
0.8723
spearman_manhattan
0.8755
pearson_euclidean
0.8727
spearman_euclidean
0.8759
pearson_dot
0.8498
spearman_dot
0.8568
pearson_max
0.8727
spearman_max
0.8761
Semantic Similarity
Metric
Value
pearson_cosine
0.8546
spearman_cosine
0.8715
pearson_manhattan
0.8698
spearman_manhattan
0.8737
pearson_euclidean
0.8699
spearman_euclidean
0.8737
pearson_dot
0.8131
spearman_dot
0.8076
pearson_max
0.8699
spearman_max
0.8737
Semantic Similarity
Metric
Value
pearson_cosine
0.8388
spearman_cosine
0.8645
pearson_manhattan
0.8611
spearman_manhattan
0.8667
pearson_euclidean
0.8622
spearman_euclidean
0.868
pearson_dot
0.7492
spearman_dot
0.7364
pearson_max
0.8622
spearman_max
0.868
Semantic Similarity
Metric
Value
pearson_cosine
0.8168
spearman_cosine
0.8585
pearson_manhattan
0.8518
spearman_manhattan
0.8607
pearson_euclidean
0.8534
spearman_euclidean
0.8624
pearson_dot
0.6646
spearman_dot
0.6473
pearson_max
0.8534
spearman_max
0.8624
Semantic Similarity
Metric
Value
pearson_cosine
0.7814
spearman_cosine
0.8425
pearson_manhattan
0.8315
spearman_manhattan
0.8432
pearson_euclidean
0.8345
spearman_euclidean
0.8466
pearson_dot
0.5521
spearman_dot
0.5319
pearson_max
0.8345
spearman_max
0.8466
Semantic Similarity
Metric
Value
pearson_cosine
0.7198
spearman_cosine
0.8072
pearson_manhattan
0.7806
spearman_manhattan
0.7998
pearson_euclidean
0.7879
spearman_euclidean
0.809
pearson_dot
0.4496
spearman_dot
0.4412
pearson_max
0.7879
spearman_max
0.809
Training Details
Training Dataset
stsb_multi_es_aug
Dataset: stsb_multi_es_aug
Size: 2,697 training samples
Columns: sentence1, sentence2, and score
Approximate statistics based on the first 1000 samples:
sentence1
sentence2
score
type
string
string
float
details
min: 8 tokens mean: 22.25 tokens max: 68 tokens
min: 8 tokens mean: 22.01 tokens max: 79 tokens
min: 0.0 mean: 2.67 max: 5.0
Samples:
sentence1
sentence2
score
El pájaro de tamaño reducido se posó con delicadeza en una rama cubierta de escarcha.
Un ave de color amarillo descansaba tranquilamente en una rama.
3.200000047683716
Una chica está tocando la flauta en un parque.
Un grupo de músicos está tocando en un escenario al aire libre.
1.286
La aclamada escritora británica, Doris Lessing, galardonada con el premio Nobel, fallece
La destacada autora británica, Doris Lessing, reconocida con el prestigioso Premio Nobel, muere
4.199999809265137
Loss: MatryoshkaLoss with these parameters:{
"loss" : "CoSENTLoss" ,
"matryoshka_dims" : [
768 ,
512 ,
256 ,
128 ,
64 ,
32 ,
16
] ,
"matryoshka_weights" : [
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1
] ,
"n_dims_per_step" : -1
}
Evaluation Dataset
stsb_multi_es_aug
Dataset: stsb_multi_es_aug
Size: 697 evaluation samples
Columns: sentence1, sentence2, and score
Approximate statistics based on the first 1000 samples:
sentence1
sentence2
score
type
string
string
float
details
min: 8 tokens mean: 22.76 tokens max: 67 tokens
min: 7 tokens mean: 22.26 tokens max: 63 tokens
min: 0.0 mean: 2.3 max: 5.0
Samples:
sentence1
sentence2
score
Un incendio ocurrido en un hospital psiquiátrico ruso resultó en la trágica muerte de 38 personas.
Se teme que el incendio en un hospital psiquiátrico ruso cause la pérdida de la vida de 38 individuos.
4.199999809265137
"Street dijo que el otro individuo a veces se siente avergonzado de su fiesta, lo cual provoca risas en la multitud"
"A veces, el otro tipo se encuentra avergonzado de su fiesta y no se le puede culpar."
3.5
El veterano diplomático de Malasia tuvo un encuentro con Suu Kyi el miércoles en la casa del lago en Yangon donde permanece bajo arresto domiciliario.
Razali Ismail tuvo una reunión de 90 minutos con Suu Kyi, quien ganó el Premio Nobel de la Paz en 1991, en su casa del lago donde está recluida.
3.691999912261963
Loss: MatryoshkaLoss with these parameters:{
"loss" : "CoSENTLoss" ,
"matryoshka_dims" : [
768 ,
512 ,
256 ,
128 ,
64 ,
32 ,
16
] ,
"matryoshka_weights" : [
1 ,
1 ,
1 ,
1 ,
1 ,
1 ,
1
] ,
"n_dims_per_step" : -1
}
Training Hyperparameters
Non-Default Hyperparameters
eval_strategy: steps
per_device_train_batch_size: 16
per_device_eval_batch_size: 16
num_train_epochs: 5
warmup_ratio: 0.1
fp16: True
All Hyperparameters
Click to expand
overwrite_output_dir: False
do_predict: False
eval_strategy: steps
prediction_loss_only: True
per_device_train_batch_size: 16
per_device_eval_batch_size: 16
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 1
eval_accumulation_steps: None
learning_rate: 5e-05
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 5
max_steps: -1
lr_scheduler_type: linear
lr_scheduler_kwargs: {}
warmup_ratio: 0.1
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: True
logging_nan_inf_filter: True
save_safetensors: True
save_on_each_node: False
save_only_model: False
restore_callback_states_from_checkpoint: False
no_cuda: False
use_cpu: False
use_mps_device: False
seed: 42
data_seed: None
jit_mode_eval: False
use_ipex: False
bf16: False
fp16: True
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: False
fp16_full_eval: False
tf32: None
local_rank: 0
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: False
debug: []
dataloader_drop_last: False
dataloader_num_workers: 0
dataloader_prefetch_factor: None
past_index: -1
disable_tqdm: False
remove_unused_columns: True
label_names: None
load_best_model_at_end: False
ignore_data_skip: False
fsdp: []
fsdp_min_num_params: 0
fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed: None
label_smoothing_factor: 0.0
optim: adamw_torch
optim_args: None
adafactor: False
group_by_length: False
length_column_name: length
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: False
dataloader_pin_memory: True
dataloader_persistent_workers: False
skip_memory_metrics: True
use_legacy_prediction_loop: False
push_to_hub: False
resume_from_checkpoint: None
hub_model_id: None
hub_strategy: every_save
hub_private_repo: False
hub_always_push: False
gradient_checkpointing: False
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: False
eval_do_concat_batches: True
fp16_backend: auto
push_to_hub_model_id: None
push_to_hub_organization: None
mp_parameters:
auto_find_batch_size: False
full_determinism: False
torchdynamo: None
ray_scope: last
ddp_timeout: 1800
torch_compile: False
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: False
include_num_input_tokens_seen: False
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: False
batch_sampler: batch_sampler
multi_dataset_batch_sampler: proportional
Training Logs
Epoch
Step
Training Loss
loss
sts-dev-128_spearman_cosine
sts-dev-16_spearman_cosine
sts-dev-256_spearman_cosine
sts-dev-32_spearman_cosine
sts-dev-512_spearman_cosine
sts-dev-64_spearman_cosine
sts-dev-768_spearman_cosine
sts-test-128_spearman_cosine
sts-test-16_spearman_cosine
sts-test-256_spearman_cosine
sts-test-32_spearman_cosine
sts-test-512_spearman_cosine
sts-test-64_spearman_cosine
sts-test-768_spearman_cosine
0.5917
100
30.7503
30.6172
0.8117
0.7110
0.8179
0.7457
0.8244
0.7884
0.8252
-
-
-
-
-
-
-
1.1834
200
30.4696
32.6422
0.7952
0.7198
0.8076
0.7491
0.8125
0.7813
0.8142
-
-
-
-
-
-
-
1.7751
300
29.9233
31.5469
0.8152
0.7435
0.8250
0.7737
0.8302
0.8006
0.8305
-
-
-
-
-
-
-
2.3669
400
29.0716
31.8088
0.8183
0.7405
0.8248
0.7758
0.8299
0.8057
0.8324
-
-
-
-
-
-
-
2.9586
500
28.7971
32.6032
0.8176
0.7430
0.8241
0.7777
0.8289
0.8025
0.8316
-
-
-
-
-
-
-
3.5503
600
27.4766
34.7911
0.8241
0.7400
0.8314
0.7730
0.8369
0.8061
0.8394
-
-
-
-
-
-
-
4.1420
700
27.0639
35.7418
0.8294
0.7466
0.8354
0.7784
0.8389
0.8107
0.8409
-
-
-
-
-
-
-
4.7337
800
26.5119
36.2014
0.8305
0.7425
0.8356
0.7806
0.8406
0.8152
0.8430
-
-
-
-
-
-
-
5.0
845
-
-
-
-
-
-
-
-
-
0.8645
0.8072
0.8715
0.8425
0.8761
0.8585
0.8775
Framework Versions
Python: 3.10.12
Sentence Transformers: 3.0.0
Transformers: 4.41.1
PyTorch: 2.3.0+cu121
Accelerate: 0.30.1
Datasets: 2.19.1
Tokenizers: 0.19.1
Citation
BibTeX
Sentence Transformers
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
MatryoshkaLoss
@misc{kusupati2024matryoshka,
title={Matryoshka Representation Learning},
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
year={2024},
eprint={2205.13147},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
CoSENTLoss
@online{kexuefm-8847,
title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
author={Su Jianlin},
year={2022},
month={Jan},
url={https://kexue.fm/archives/8847},
}