# Getting started The quickest way to get started is to install vectorlite using python. ```shell # Note: vectorlite-py not vectorlite. vectorlite is another project. pip install vectorlite-py apsw numpy ``` Vectorlite's metadata filter feature requires sqlite>=3.38. Python's builtin `sqlite` module is usually built with old sqlite versions. So `apsw` is used here as sqlite driver, because it provides bindings to latest sqlite. Vectorlite still works with old sqlite versions if metadata filter support is not required. Below is a minimal example of using vectorlite. It can also be found in the [examples folder](https://github.com/1yefuwang1/vectorlite/tree/v0.2.0/examples). ```python import vectorlite_py import apsw import numpy as np """ Quick start of using vectorlite extension. """ conn = apsw.Connection(':memory:') conn.enable_load_extension(True) # enable extension loading conn.load_extension(vectorlite_py.vectorlite_path()) # load vectorlite cursor = conn.cursor() # check if vectorlite is loaded print(cursor.execute('select vectorlite_info()').fetchall()) # Vector distance calculation for distance_type in ['l2', 'cosine', 'ip']: v1 = "[1, 2, 3]" v2 = "[4, 5, 6]" # Note vector_from_json can be used to convert a JSON string to a vector distance = cursor.execute(f'select vector_distance(vector_from_json(?), vector_from_json(?), "{distance_type}")', (v1, v2)).fetchone() print(f'{distance_type} distance between {v1} and {v2} is {distance[0]}') # generate some test data DIM = 32 # dimension of the vectors NUM_ELEMENTS = 10000 # number of vectors data = np.float32(np.random.random((NUM_ELEMENTS, DIM))) # Only float32 vectors are supported by vectorlite for now # Create a virtual table using vectorlite using l2 distance (default distance type) and default HNSW parameters cursor.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}], hnsw(max_elements={NUM_ELEMENTS}))') # Vector distance type can be explicitly set to cosine using: # cursor.execute(f'create virtual table my_table using vectorlite(my_embedding float32[{DIM}] cosine, hnsw(max_elements={NUM_ELEMENTS}))') # Insert the test data into the virtual table. Note that the rowid MUST be explicitly set when inserting vectors and cannot be auto-generated. # The rowid is used to uniquely identify a vector and serve as a "foreign key" to relate to the vector's metadata. # Vectorlite takes vectors in raw bytes, so a numpy vector need to be converted to bytes before inserting into the table. cursor.executemany('insert into my_table(rowid, my_embedding) values (?, ?)', [(i, data[i].tobytes()) for i in range(NUM_ELEMENTS)]) # Query the virtual table to get the vector at rowid 12345. Note the vector needs to be converted back to json using vector_to_json() to be human-readable. result = cursor.execute('select vector_to_json(my_embedding) from my_table where rowid = 1234').fetchone() print(f'vector at rowid 1234: {result[0]}') # Find 10 approximate nearest neighbors of data[0] and there distances from data[0]. # knn_search() is used to tell vectorlite to do a vector search. # knn_param(V, K, ef) is used to pass the query vector V, the number of nearest neighbors K to find and an optional ef parameter to tune the performance of the search. # If ef is not specified, ef defaults to 10. For more info on ef, please check https://github.com/nmslib/hnswlib/blob/v0.8.0/ALGO_PARAMS.md result = cursor.execute('select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, 10))', [data[0].tobytes()]).fetchall() print(f'10 nearest neighbors of row 0 is {result}') # Find 10 approximate nearest neighbors of the first embedding in vectors with rowid within [1000, 2000) using metadata(rowid) filtering. rowids = ','.join([str(rowid) for rowid in range(1000, 2000)]) result = cursor.execute(f'select rowid, distance from my_table where knn_search(my_embedding, knn_param(?, 10)) and rowid in ({rowids})', [data[0].tobytes()]).fetchall() print(f'10 nearest neighbors of row 0 in vectors with rowid within [1000, 2000) is {result}') conn.close() ``` More examples can be found in [examples](https://github.com/1yefuwang1/vectorlite/tree/v0.2.0/examples) folder.