Column Management

Column Management#

Important

This notebook is in the process of being migrated to Vast Data Platform Field Docs. It will probably not run yet.

Install sdk and connect to Vast DB#

Install vastdb library.

!pip install --quiet vastdb

# Change these variables to reflect your environment, E.g. 
#
# ENDPOINT = 'http://your_vast_endpoint:12345'
# DATABASE_NAME = 'your_db'
# ACCESS_KEY = 'your_access_key'
# SECRET_KEY = 'your_secret_key'
# DATABASE_SCHEMA = 'your_database_schema'
#
# This will be created:
# TABLE_NAME='TEMPORARY_TABLE'

Connect to Vast DB

import vastdb

session = vastdb.connect(
    endpoint=ENDPOINT,
    access=ACCESS_KEY,
    secret=SECRET_KEY)

Column Management API#

`columns`#

Usage: List all columns of a table.
Parameters:
- No parameters

Create a table for our column management examples.

Create a utility function to print columns.

print_columns(DATABASE_NAME, DATABASE_SCHEMA, TABLE_NAME)

`add_columns`#

Usage: Add new columns to an existing table.
Parameters:
- new_column (Apache Arrow Schema): Schema of the columns to add.

# verify the table exists
print_columns(DATABASE_NAME, DATABASE_SCHEMA, TABLE_NAME)

# let's create a utility method to check if a column exists
def column_exists(table, column_name):
    if table:
        try:
            # field(column_name) is a pyarrow method
            # https://arrow.apache.org/docs/python/generated/pyarrow.Schema.html#pyarrow.Schema.field
            cols = table.columns()
            cols.field(column_name)
            return True
        except KeyError:
            return False
        except Exception as e:
            raise e
    else:
        return False

import pyarrow as pa

NEW_COLUMN_NAME = 'new_column'
NEW_COLUMNS = pa.schema([(NEW_COLUMN_NAME, pa.int64())])

with session.transaction() as tx:
    schema = tx.bucket(DATABASE_NAME).schema(DATABASE_SCHEMA)
    table = schema.table(name=TABLE_NAME, fail_if_missing=False)
    if table:
        try:
            if column_exists(table, NEW_COLUMN_NAME):
                print("Skipping.  Column already exists.")
            else:
                print(f"Adding column to {table.name}")
                table.add_column(new_column=NEW_COLUMNS)
        except Exception as e:
            print("Couldn't add column - verify that it doesn't already exist")
            print(e)
    else:
        print(f"Couldn't find the table {TABLE_NAME}.")

print_columns(DATABASE_NAME, DATABASE_SCHEMA, TABLE_NAME)

`rename_column`#

Usage: Rename a column in a table.
Parameters:
- current_column_name (str): The name of the column.
- new_column_name (str, optional): New column name (default is an empty string "").

Alter and rename the Column#

CUR_COLUMN_NAME = 'new_column'
NEW_COLUMN_NAME = 'renamed_new_column'

with session.transaction() as tx:
    schema = tx.bucket(DATABASE_NAME).schema(DATABASE_SCHEMA)
    table = schema.table(name=TABLE_NAME, fail_if_missing=False)
    if table:
        try:
            if column_exists(table, NEW_COLUMN_NAME):
                print("Skipping.  Column already exists.")
            else:
                print(f"Adding column to {table.name}")
                table.rename_column(current_column_name=CUR_COLUMN_NAME, new_column_name=NEW_COLUMN_NAME)
        except Exception as e:
            print("Couldn't add column - verify that it doesn't already exist")
            print(e)
    else:
        print(f"Couldn't find the table {TABLE_NAME}.")

Validate that the name of the column has changed#

print_columns(DATABASE_NAME, DATABASE_SCHEMA, TABLE_NAME)

`drop_column`#

Usage: Remove columns from a table.
Parameters:
- column_to_drop (Apache Arrow Schema): Schema of the columns to remove.

COLUMN_NAME = 'renamed_new_column'
COLUMN_TO_DROP = pa.schema([(COLUMN_NAME, pa.int64())])

with session.transaction() as tx:
    schema = tx.bucket(DATABASE_NAME).schema(DATABASE_SCHEMA)
    table = schema.table(name=TABLE_NAME, fail_if_missing=False)
    if table:
        try:
            if not column_exists(table, COLUMN_NAME):
                print("Skipping.  Column doesn't exists.")
            else:
                print(f"Dropping column from {table.name}")
                table.drop_column(column_to_drop=COLUMN_TO_DROP)
        except Exception as e:
            print("Couldn't add column - verify that it doesn't already exist")
            print(e)
    else:
        print(f"Couldn't find the table {TABLE_NAME}.")

Column Management

Contents