-
Notifications
You must be signed in to change notification settings - Fork 89
Expand file tree
/
Copy pathexample.py
More file actions
65 lines (52 loc) · 2.01 KB
/
example.py
File metadata and controls
65 lines (52 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Run:
# ollama pull llama3.2
# ollama pull nomic-embed-text
# ollama serve
import numpy as np
import ollama
from pathlib import Path
from pgvector.psycopg import register_vector
import psycopg
import urllib.request
query = 'What index types are supported?'
load_data = True
conn = psycopg.connect(dbname='pgvector_example', autocommit=True)
conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
register_vector(conn)
if load_data:
# get data
url = 'https://v-raw-githubusercontent-com.286600.xyz/pgvector/pgvector/refs/heads/master/README.md'
dest = Path(__file__).parent / 'README.md'
if not dest.exists():
urllib.request.urlretrieve(url, dest)
with open(dest, encoding='utf-8') as f:
doc = f.read()
# generate chunks
# TODO improve chunking
# TODO remove markdown
chunks = doc.split('\n## ')
# embed chunks
# nomic-embed-text has task instruction prefix
input = ['search_document: ' + chunk for chunk in chunks]
embeddings = ollama.embed(model='nomic-embed-text', input=input).embeddings
# create table
conn.execute('DROP TABLE IF EXISTS chunks')
conn.execute('CREATE TABLE chunks (id bigserial PRIMARY KEY, content text, embedding vector(768))')
# store chunks
cur = conn.cursor()
with cur.copy('COPY chunks (content, embedding) FROM STDIN WITH (FORMAT BINARY)') as copy:
copy.set_types(['text', 'vector'])
for content, embedding in zip(chunks, embeddings):
copy.write_row([content, embedding])
# embed query
# nomic-embed-text has task instruction prefix
input = 'search_query: ' + query
embedding = ollama.embed(model='nomic-embed-text', input=input).embeddings[0]
# retrieve chunks
result = conn.execute('SELECT content FROM chunks ORDER BY embedding <=> %s LIMIT 5', (np.array(embedding),)).fetchall()
context = '\n\n'.join([row[0] for row in result])
# get answer
# TODO improve prompt
prompt = f'Answer this question: {query}\n\n{context}'
response = ollama.generate(model='llama3.2', prompt=prompt).response
print(response)