Table Management#

Important

This notebook is in the process of being migrated to Vast Data Platform Field Docs. It will probably not run yet.

See also

The Vast DB SDK API Documentation is available here.

The creation of any high-level entity (database, table, column) is a metadata operation. There is no pre-allocation of space for data. Top level objects in the element store are updated and any new data associated with them is ready to be linked when written.Creation The creation of any high-level entity (database, table, column) is a metadata operation. There is no pre-allocation of space for data. Top level objects in the element store are updated and any new data associated with them is ready to be linked when written.

VAST DB provides support for the following data types:

image

Install sdk and connect to Vast DB#

Install vastdb library.

!pip install --quiet vastdb
# Change these variables to reflect your environment, E.g. 
#
# ENDPOINT = 'http://your_vast_endpoint:12345'
# DATABASE_NAME = 'your_db'
# ACCESS_KEY = 'your_access_key'
# SECRET_KEY = 'your_secret_key'
# DATABASE_SCHEMA = 'your_database_schema'
#
# This will be created:
# TABLE_NAME='python_sdk_demo'

Connect to Vast DB

import vastdb

session = vastdb.connect(
    endpoint=ENDPOINT,
    access=ACCESS_KEY,
    secret=SECRET_KEY)

Table Management API#

create_table#

  • Usage: Create a new table in a specified schema.

  • Parameters:

    • table_name (str): Name of the table to create.

    • columns (pyarrow.Schema): Arrow Schema object (pyarrow.Schema documentation)

    • fail_if_missing (bool, opt, default=True) If True, fail with an exception if the schema doesn’t exist, else return None

    • use_external_row_ids_allocation=False (bool, opt, default=False) TBC

import pyarrow as pa
from vastdb.errors import TableExists

# Table schemas (don't confuse with database schema) are created using 
# PyArrow (pa)
ARROW_SCHEMA = pa.schema([('column1', pa.int32()), ('column2', pa.string())])


with session.transaction() as tx:
    bucket = tx.bucket(DATABASE_NAME)

    # first retrieve the schema
    try:
        schema = bucket.schema(name=DATABASE_SCHEMA, fail_if_missing=False)
        print(schema)
    except Exception as e:
        print("Schema doesn't exist:", e)

    if schema:
        try:
            table = schema.create_table(table_name=TABLE_NAME, columns=ARROW_SCHEMA)
            print(f"Table created: {table.name}")
        except TableExists as e:
            print("Couldn't create table because it already exists:", e)
        except Exception as e:
            print("Couldn't create table:", e)

The cell below is a boilerplate code - it is hidden by default.

Hide code cell content
def print_tables(database_name=DATABASE_NAME, schema_name=DATABASE_SCHEMA):
    print(f"Listing tables in: database='{database_name}' schema='{schema_name}'")
    with session.transaction() as tx:
        schema = tx.bucket(database_name).schema(name=schema_name, fail_if_missing=False)
        if not schema:
            print(f">>> Schema {schema_name} not found.")
            return
            
        if not schema.tables():
            print(">>> No tables found.")
        for table in schema.tables():
            print(f">>> Table: '{table.name}'")
print_tables(database_name=DATABASE_NAME, schema_name=DATABASE_SCHEMA)

tables#

  • Usage: List all tables in a schema.

  • Parameters:

    • table_name (str, optional, default=None): Only return tables matching the exact table_name.

with session.transaction() as tx:
    schema = tx.bucket(DATABASE_NAME).schema(DATABASE_SCHEMA)
    table = schema.tables(table_name=TABLE_NAME)
    print(table)
# what happens if the table doesn't exist?

with session.transaction() as tx:
    schema = tx.bucket(DATABASE_NAME).schema(DATABASE_SCHEMA)
    table = schema.tables(table_name='non-existing-table')
    print(table)
# For comparison let's list all the tables using our utility function
# created earlier in this notebook.

print_tables(database_name=DATABASE_NAME, schema_name=DATABASE_SCHEMA)

get_stats#

  • Usage: Obtain statistics about a specific table.

  • Parameters:

    • No parameters

TABLE_NAME='pythonsdk'

with session.transaction() as tx:
    schema = tx.bucket(DATABASE_NAME).schema(DATABASE_SCHEMA)
    table = schema.table(name=TABLE_NAME, fail_if_missing=False)
    if table:
        print(f"Getting table stats {table.name}")
        print(table.get_stats())

drop#

  • Usage: Delete a table.

  • Parameters:

    • No parameters

# Create a table and then drop it.

with session.transaction() as tx:
    bucket = tx.bucket(DATABASE_NAME)

    # first retrieve the schema
    try:
        schema = bucket.schema(name=DATABASE_SCHEMA, fail_if_missing=False)
        print(schema)
    except Exception as e:
        print("Schema doesn't exist:", e)

    if schema:
        try:
            arrow_schema = pa.schema([('column1', pa.int32()), ('column2', pa.string())])
            table = schema.create_table(table_name='TEMPORARY_TABLE', columns=arrow_schema)
            print(f"Table created: {table.name}")
        except TableExists as e:
            print("Couldn't create table because it already exists:", e)
        except Exception as e:
            print("Couldn't create table:", e)
# first let's list the tables
print_tables(database_name=DATABASE_NAME, schema_name=DATABASE_SCHEMA)

Now drop the table.

with session.transaction() as tx:
    schema = tx.bucket(DATABASE_NAME).schema(DATABASE_SCHEMA)
    table = schema.table(name='TEMPORARY_TABLE', fail_if_missing=False)
    if table:
        print(f"Dropping table {table.name}")
        table.drop()

Verify the table no longer exists.

print_tables(database_name=DATABASE_NAME, schema_name=DATABASE_SCHEMA)