6 documents into text PGvectorStore
Menyimpan, mengakses dan merubah data pada vector store¶
In [1]:
Copied!
from langchain_postgres import PGVectorStore, PGEngine
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain_postgres import PGVectorStore, PGEngine
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama.embeddings import OllamaEmbeddings
One of the requirements and arguments to establish PostgreSQL as a vector store is a PGEngine object. The PGEngine configures a shared connection pool to your Postgres database. This is an industry best practice to manage number of connections and to reduce latency through cached database connections.
In [2]:
Copied!
## Membuat koneksi terlebih menggunakan class PGEngine
CONNECTION = "postgresql+psycopg://farras:farras@localhost:6060/farraslang"
## Engine
pg_engine = PGEngine.from_connection_string(CONNECTION)
## Membuat koneksi terlebih menggunakan class PGEngine
CONNECTION = "postgresql+psycopg://farras:farras@localhost:6060/farraslang"
## Engine
pg_engine = PGEngine.from_connection_string(CONNECTION)
In [3]:
Copied!
## Embeddings
model_embeddings = OllamaEmbeddings(model="nomic-embed-text")
## Eksisting table name
TABLE_NAME = 'my_pg_vector_store'
## Inisiasi PGVectoreStore menggunakan table baru
# THis only run once
try:
await pg_engine.ainit_vectorstore_table(
table_name=TABLE_NAME,
vector_size=768
)
except Exception as e:
print(f"Datase bisa jadi sudah ada {e}")
## Above code akan membuat table dengan nama my_pg_vector_store dengan kolomabs
## * langchain_id
## * content
## * embedding
## * langchain_metadata
## Embeddings
model_embeddings = OllamaEmbeddings(model="nomic-embed-text")
## Eksisting table name
TABLE_NAME = 'my_pg_vector_store'
## Inisiasi PGVectoreStore menggunakan table baru
# THis only run once
try:
await pg_engine.ainit_vectorstore_table(
table_name=TABLE_NAME,
vector_size=768
)
except Exception as e:
print(f"Datase bisa jadi sudah ada {e}")
## Above code akan membuat table dengan nama my_pg_vector_store dengan kolomabs
## * langchain_id
## * content
## * embedding
## * langchain_metadata
Datase bisa jadi sudah ada (psycopg.errors.DuplicateTable) relation "my_pg_vector_store" already exists
[SQL: CREATE TABLE "public"."my_pg_vector_store"(
"langchain_id" UUID PRIMARY KEY,
"content" TEXT NOT NULL,
"embedding" vector(768) NOT NULL
,
"langchain_metadata" JSON
);]
(Background on this error at: https://sqlalche.me/e/20/f405)
In [4]:
Copied!
model_embeddings = OllamaEmbeddings(model="nomic-embed-text")
class_vector_store = await PGVectorStore.create(
engine=pg_engine,
table_name=TABLE_NAME,
embedding_service=model_embeddings
)
model_embeddings = OllamaEmbeddings(model="nomic-embed-text")
class_vector_store = await PGVectorStore.create(
engine=pg_engine,
table_name=TABLE_NAME,
embedding_service=model_embeddings
)
In [5]:
Copied!
root_path = "C:/Users/maruf/Documents/LangChainLabAssets"
text_data_store_vector = TextLoader(f'{root_path}/BigDataForTwo.txt')
## Memeuat loader kedalam bentuk class Documents
docs = text_data_store_vector.load()
## Potong big data kedalam potongan2an
splitter_store_vector = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200
)
## Class document baru setelah di potong
new_docs = splitter_store_vector.split_documents(docs)
root_path = "C:/Users/maruf/Documents/LangChainLabAssets"
text_data_store_vector = TextLoader(f'{root_path}/BigDataForTwo.txt')
## Memeuat loader kedalam bentuk class Documents
docs = text_data_store_vector.load()
## Potong big data kedalam potongan2an
splitter_store_vector = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 200
)
## Class document baru setelah di potong
new_docs = splitter_store_vector.split_documents(docs)
Store data ke vector store¶
In [6]:
Copied!
from langchain_core.documents import Document
from uuid import uuid4
## Membuat documentsa
contents = ['Bismillah, semoga Allah luluskan aku pada kali ini menjadi Aparatur Sipil Negara',
'Allah maha menerima doa hambanya, Allah selalu mendeger suara hambanya, Allah maha mengabulkan doa hambanya']
documents = [Document(content, metadata={'jenis':f'nasihat ke {angka}','author':'muhammad farras'}) for angka, content in enumerate(contents)]
class_vector_store.add_documents(documents)
from langchain_core.documents import Document
from uuid import uuid4
## Membuat documentsa
contents = ['Bismillah, semoga Allah luluskan aku pada kali ini menjadi Aparatur Sipil Negara',
'Allah maha menerima doa hambanya, Allah selalu mendeger suara hambanya, Allah maha mengabulkan doa hambanya']
documents = [Document(content, metadata={'jenis':f'nasihat ke {angka}','author':'muhammad farras'}) for angka, content in enumerate(contents)]
class_vector_store.add_documents(documents)
Out[6]:
['dc07ca72-5a18-4dc9-88f1-6f6efcdbc591', 'a4f5451a-01e7-4ccd-994b-9c7b594604cd']
Data diatas akan membentuk langchain_id UUID, namun kita juga dapat membuatnya secara manual guna melakuakn indexing
In [7]:
Copied!
from langchain_core.documents import Document
import uuid
contents2 = ['Bismillah, semoga Allah luluskan aku pada kali ini menjadi Aparatur Sipil Negara']
documents = [Document(content, metadata={'jenis':f'nasihat ke 3','author':'muhammad farras'}) for angka, content in enumerate(contents2)]
class_vector_store.add_documents(documents,[str(uuid.uuid4())])
from langchain_core.documents import Document
import uuid
contents2 = ['Bismillah, semoga Allah luluskan aku pada kali ini menjadi Aparatur Sipil Negara']
documents = [Document(content, metadata={'jenis':f'nasihat ke 3','author':'muhammad farras'}) for angka, content in enumerate(contents2)]
class_vector_store.add_documents(documents,[str(uuid.uuid4())])
Out[7]:
['cf3e12bf-41df-4c89-ac35-6356d00517ad']
Delete data using ID index¶
untuk menghapus data kita dapat menggunakan method delete dengan parameter pertama ids diisi dengan list of number UUID.
In [8]:
Copied!
class_vector_store.delete(ids=['ea619ca2-5fdd-4678-b882-d48dc1db02fd'])
class_vector_store.delete(ids=['ea619ca2-5fdd-4678-b882-d48dc1db02fd'])
Out[8]:
True
Mencari similiary data¶
In [9]:
Copied!
docs_for_similiary_search = class_vector_store.similarity_search("visi utama", k=1)
docs_for_similiary_search
docs_for_similiary_search = class_vector_store.similarity_search("visi utama", k=1)
docs_for_similiary_search
Out[9]:
[Document(id='f30472e6-c2e5-5777-8279-da9bf3aee327', metadata={'tanggal': '2026-02-08 10:10:34.207912', 'di': 'rumah 2'}, page_content='saya sedang belajar langchain juga hari ini.')]
In [10]:
Copied!
docs_for_similiary_search = class_vector_store.similarity_search("jenenge sopo", k=1)
docs_for_similiary_search
docs_for_similiary_search = class_vector_store.similarity_search("jenenge sopo", k=1)
docs_for_similiary_search
Out[10]:
[Document(id='ec2f09a3-f68b-5376-8f30-2a2f163f7398', metadata={'tanggal': '2026-02-08 10:10:34.207796', 'di': 'rst'}, page_content='saya hari ini lagi diruma sakit RST Dompet duafa, saya sendang mengidap penyakit infeksi paru')]
Mencari similiary data dengan score¶
In [11]:
Copied!
docs_for_similiary_search = class_vector_store.similarity_search_with_relevance_scores ("Semoga tahun ini adalah tahun terkahir di Vensys", k=1)
docs_for_similiary_search[0][1]
docs_for_similiary_search = class_vector_store.similarity_search_with_relevance_scores ("Semoga tahun ini adalah tahun terkahir di Vensys", k=1)
docs_for_similiary_search[0][1]
Out[11]:
0.6741104728678319
In [12]:
Copied!
docs_for_similiary_search = class_vector_store.similarity_search_with_relevance_scores ("Jenenge sopo", k=1)
print(docs_for_similiary_search)
docs_for_similiary_search = class_vector_store.similarity_search_with_relevance_scores ("Jenenge sopo", k=1)
print(docs_for_similiary_search)
[(Document(id='70fafa1b-7ec4-4e57-ad75-cac910eeda34', metadata={'jenis': 'nasihat ke 1', 'author': 'muhammad farras'}, page_content='Allah maha menerima doa hambanya, Allah selalu mendeger suara hambanya, Allah maha mengabulkan doa hambanya'), 0.4920965986770105)]
Menggunakan asimilarity_search¶
In [13]:
Copied!
query = "I'd like a fruit."
docs = await class_vector_store.asimilarity_search_with_relevance_scores(query, k=1)
print(docs)
query = "I'd like a fruit."
docs = await class_vector_store.asimilarity_search_with_relevance_scores(query, k=1)
print(docs)
[(Document(id='70fafa1b-7ec4-4e57-ad75-cac910eeda34', metadata={'jenis': 'nasihat ke 1', 'author': 'muhammad farras'}, page_content='Allah maha menerima doa hambanya, Allah selalu mendeger suara hambanya, Allah maha mengabulkan doa hambanya'), 0.5015093091748534)]