00 embeddings before llm
In [2]:
Copied!
import pandas as pd
import pandas as pd
In [3]:
Copied!
bags_of_wods = "What a sunny day Such bright skies today I have not seen a sunny day in weeks".lower().split(" ")
bags_of_wods = list(dict.fromkeys(bags_of_wods))
kalimat_1 = "what a sunny day"
kalimat_2 = "such bright skies today"
kalimat_3 = "i have not seen a sunny day in weeks"
sparse_embeddings_kalimat1 = [(1,a) if a in kalimat_1.split(" ") else 0 for a in bags_of_wods]
sparse_embeddings_kalimat2 = [(1,a) if a in kalimat_2.split(" ") else 0 for a in bags_of_wods]
sparse_embeddings_kalimat3 = [(1,a) if a in kalimat_3.split(" ") else 0 for a in bags_of_wods]
df_embeddings = pd.DataFrame({'bag_p_word':bags_of_wods,
kalimat_1:sparse_embeddings_kalimat1,
kalimat_2:sparse_embeddings_kalimat2,
kalimat_3:sparse_embeddings_kalimat3
})
df_embeddings
bags_of_wods = "What a sunny day Such bright skies today I have not seen a sunny day in weeks".lower().split(" ")
bags_of_wods = list(dict.fromkeys(bags_of_wods))
kalimat_1 = "what a sunny day"
kalimat_2 = "such bright skies today"
kalimat_3 = "i have not seen a sunny day in weeks"
sparse_embeddings_kalimat1 = [(1,a) if a in kalimat_1.split(" ") else 0 for a in bags_of_wods]
sparse_embeddings_kalimat2 = [(1,a) if a in kalimat_2.split(" ") else 0 for a in bags_of_wods]
sparse_embeddings_kalimat3 = [(1,a) if a in kalimat_3.split(" ") else 0 for a in bags_of_wods]
df_embeddings = pd.DataFrame({'bag_p_word':bags_of_wods,
kalimat_1:sparse_embeddings_kalimat1,
kalimat_2:sparse_embeddings_kalimat2,
kalimat_3:sparse_embeddings_kalimat3
})
df_embeddings
Out[3]:
| bag_p_word | what a sunny day | such bright skies today | i have not seen a sunny day in weeks | |
|---|---|---|---|---|
| 0 | what | (1, what) | 0 | 0 |
| 1 | a | (1, a) | 0 | (1, a) |
| 2 | sunny | (1, sunny) | 0 | (1, sunny) |
| 3 | day | (1, day) | 0 | (1, day) |
| 4 | such | 0 | (1, such) | 0 |
| 5 | bright | 0 | (1, bright) | 0 |
| 6 | skies | 0 | (1, skies) | 0 |
| 7 | today | 0 | (1, today) | 0 |
| 8 | i | 0 | 0 | (1, i) |
| 9 | have | 0 | 0 | (1, have) |
| 10 | not | 0 | 0 | (1, not) |
| 11 | seen | 0 | 0 | (1, seen) |
| 12 | in | 0 | 0 | (1, in) |
| 13 | weeks | 0 | 0 | (1, weeks) |
In [16]:
Copied!
def parse_value(a):
return a if isinstance(a,int) else a[0]
sparse_embeddings = [parse_value(a) for a in df_embeddings[kalimat_3]]
print (sparse_embeddings)
def parse_value(a):
return a if isinstance(a,int) else a[0]
sparse_embeddings = [parse_value(a) for a in df_embeddings[kalimat_3]]
print (sparse_embeddings)
[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
Dapat kita lihat bahwa sparse squance untuk kalimat "i have not seen a sunny day in weeks" adalah 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1
In [ ]:
Copied!