Query Catalog via Jupyter and Trino

Query Catalog via Jupyter and Trino#

Also see:

import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
%load_ext sql
%sql trino://admin@${DOCKER_HOST_OR_IP}:8443/vast  --connection_arguments '{"http_scheme":"https", "verify": false}'
Connecting to 'trino://admin@10.143.11.241:8443/vast'
%%sql

-- Jupysql throws an error when using `SELECT *` so manually specify columns
    
SELECT
        --phandle, -- phandle causes a jupyter issue
        creation_time,
        uid,
        owner_sid,
        owner_name,
        gid,
        group_owner_sid,
        group_owner_name,
        atime,
        mtime,
        ctime,
        nlinks,
        element_type,
        size,
        used,
        tenant_id,
        name,
        extension,
        parent_path,
        symlink_path,
        major_device,
        minor_device,
        s3_locks_retention,
        nfs_mode_bits,
        name_aces_exist,
        s3_locks_legal_hold,
        user_tags_count,
        user_metadata,
        user_tags,
        login_name,
        search_path,
        metadata_md
FROM 
    "vast-big-catalog-bucket|vast_big_catalog_schema".vast_big_catalog_table
LIMIT 10
Running query in 'trino://admin@10.143.11.241:8443/vast'
creation_time uid owner_sid owner_name gid group_owner_sid group_owner_name atime mtime ctime nlinks element_type size used tenant_id name extension parent_path symlink_path major_device minor_device s3_locks_retention nfs_mode_bits name_aces_exist s3_locks_legal_hold user_tags_count user_metadata user_tags login_name search_path metadata_md
2024-11-26 02:20:59.093006 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:20:59.101818 2024-11-26 02:20:59.101818 2024-11-26 02:20:59.101818 1 FILE 4096 4096 -1 file_61860.bin bin /james/noatime-audit/1million/se-var-cb2-c1.var.vastdata.com/subfolder_1/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb2-c1.var.vastdata.com/subfolder_1/ None
2024-11-26 02:24:35.928771 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:24:35.931208 2024-11-26 02:24:35.931208 2024-11-26 02:24:35.931208 1 FILE 4096 4096 -1 file_498305.bin bin /james/noatime-audit/1million/se-var-cb3-c2/subfolder_11/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb3-c2/subfolder_11/ None
2024-11-26 02:21:44.068628 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:21:40.621274 2024-11-26 02:21:40.621274 2024-11-26 02:21:40.621274 1 FILE 4096 4096 -1 file_318773.bin bin /james/noatime-audit/1million/se-var-cb1-c4/subfolder_7/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb1-c4/subfolder_7/ None
2024-11-26 02:22:07.256043 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:22:07.258294 2024-11-26 02:22:07.258294 2024-11-26 02:22:07.258294 1 FILE 4096 4096 -1 file_617817.bin bin /james/noatime-audit/1million/se-var-cb1-c4/subfolder_14/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb1-c4/subfolder_14/ None
2024-11-26 02:19:37.657847 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:19:54.198153 2024-11-26 02:19:54.198153 2024-11-26 02:19:54.198153 1 FILE 4096 4096 -1 file_11395.bin bin /james/noatime-audit/1million/se-var-cb2-c4.var.vastdata.com/subfolder_0/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb2-c4.var.vastdata.com/subfolder_0/ None
2024-11-26 02:20:10.874791 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:20:10.879329 2024-11-26 02:20:10.879329 2024-11-26 02:20:10.879329 1 FILE 4096 4096 -1 file_971869.bin bin /james/noatime-audit/1million/se-var-cb2-c3.var.vastdata.com/subfolder_23/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb2-c3.var.vastdata.com/subfolder_23/ None
2024-11-26 02:21:00.733078 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:21:07.871070 2024-11-26 02:21:07.871070 2024-11-26 02:21:07.871070 1 FILE 4096 4096 -1 file_984771.bin bin /james/noatime-audit/1million/se-var-cb1-c4/subfolder_23/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb1-c4/subfolder_23/ None
2024-11-26 02:23:36.208950 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:23:52.751522 2024-11-26 02:23:52.751522 2024-11-26 02:23:52.751522 1 FILE 4096 4096 -1 file_538056.bin bin /james/noatime-audit/1million/se-var-cb2-c1.var.vastdata.com/subfolder_12/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb2-c1.var.vastdata.com/subfolder_12/ None
2024-11-26 02:22:56.082591 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:23:08.950434 2024-11-26 02:23:08.950434 2024-11-26 02:23:08.950434 1 FILE 4096 4096 -1 file_150905.bin bin /james/noatime-audit/1million/se-var-cb2-c4.var.vastdata.com/subfolder_3/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb2-c4.var.vastdata.com/subfolder_3/ None
2024-11-26 02:20:10.360193 1000 S-1-5-21-2499193476-3784937457-2917841277-1114 vastdata 1001 None None 2024-11-26 02:20:30.365206 2024-11-26 02:20:30.365206 2024-11-26 02:20:30.365206 1 FILE 4096 4096 -1 file_561154.bin bin /james/noatime-audit/1million/se-var-cb1-c4/subfolder_13/ None None None None 436 False None None None None vastdata@selab.vastdata.com /james/noatime-audit/1million/se-var-cb1-c4/subfolder_13/ None
Truncated to displaylimit of 10.

Using a Dataframe#

Sometime we just need a pandas dataframe

%config SqlMagic.autopandas = True
df = %sql SELECT * FROM "vast-big-catalog-bucket|vast_big_catalog_schema".vast_big_catalog_table LIMIT 100
df
Running query in 'trino://admin@10.143.11.241:8443/vast'
phandle creation_time uid owner_sid owner_name gid group_owner_sid group_owner_name atime mtime ... s3_locks_retention nfs_mode_bits name_aces_exist s3_locks_legal_hold user_tags_count user_metadata user_tags login_name search_path metadata_md
0 (clone_id: 0, handle_id: 108889548186062326) 2024-11-12 18:38:28.290402 55572 S-1-111-734251340-2489201226-3423371408-172406... robM-demo-flow 5700 None None 2023-03-17 23:32:27.614930 2015-05-19 06:07:11.121846 ... None 420 False None None None None robM-demo-flow /scratch/home/robM-demo-flow/securing/ None
1 (clone_id: 0, handle_id: 3187652000005706689) 2024-11-12 18:38:28.103125 55572 S-1-111-734251340-2489201226-3423371408-172406... robM-demo-flow 5362 None None 2021-03-13 11:19:15.126292 2020-05-22 04:40:56.213391 ... None 420 False None None None None robM-demo-flow /scratch/home/robM-demo-flow/securing/ None
2 (clone_id: 0, handle_id: 8317808345859054910) 2024-11-12 18:38:28.211323 55572 S-1-111-734251340-2489201226-3423371408-172406... robM-demo-flow 5662 None None 2023-10-05 21:29:17.107818 2024-04-14 22:36:38.611245 ... None 420 False None None None None robM-demo-flow /scratch/home/robM-demo-flow/securing/ None
3 (clone_id: 0, handle_id: -2883570053398192133) 2024-11-12 18:38:28.842374 55572 S-1-111-734251340-2489201226-3423371408-172406... robM-demo-flow 5479 None None 2021-07-14 14:34:08.153681 2019-08-04 11:29:09.654093 ... None 420 False None None None None robM-demo-flow /scratch/home/robM-demo-flow/squamosoparietal/... None
4 (clone_id: 0, handle_id: 943058224590348322) 2024-11-12 18:38:22.536139 55572 S-1-111-734251340-2489201226-3423371408-172406... robM-demo-flow 5400 None None 2021-07-16 15:26:27.931373 2015-01-15 14:08:54.340687 ... None 420 False None None None None robM-demo-flow /scratch/home/robM-demo-flow/securing/drupes/ None
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
95 (clone_id: 0, handle_id: -3820502312683985021) 2024-11-12 18:43:26.495323 105689 S-1-111-734251340-2489201226-3423371408-172406... wcash-flow 5694 None None 2020-10-03 07:32:41.208146 2015-07-16 06:45:17.654957 ... None 420 False None None None None wcash-flow /scratch/home/wcash-flow/Jubbulpore/ None
96 (clone_id: 0, handle_id: -3810646396870322438) 2024-11-12 18:43:22.445472 105689 S-1-111-734251340-2489201226-3423371408-172406... wcash-flow 5275 None None 2023-07-17 22:07:10.497768 2021-12-07 19:36:33.050568 ... None 420 False None None None None wcash-flow /scratch/home/wcash-flow/Jubbulpore/Chaetochloa/ None
97 (clone_id: 0, handle_id: -3693598491956211871) 2024-11-12 18:43:12.211471 105689 S-1-111-734251340-2489201226-3423371408-172406... wcash-flow 5073 None None 2024-05-02 15:33:08.367469 2019-08-24 06:13:16.805246 ... None 420 False None None None None wcash-flow /scratch/home/wcash-flow/bowls/insoul/ None
98 (clone_id: 0, handle_id: -3473889008656688273) 2024-11-12 18:42:58.764932 105689 S-1-111-734251340-2489201226-3423371408-172406... wcash-flow 5812 None None 2021-11-02 12:57:45.119111 2020-01-19 20:01:43.659098 ... None 420 False None None None None wcash-flow /scratch/home/wcash-flow/Reger/uncircumscription/ None
99 (clone_id: 0, handle_id: -3373353469198921036) 2024-11-12 18:42:58.638164 105689 S-1-111-734251340-2489201226-3423371408-172406... wcash-flow 5958 None None 2020-04-22 21:07:53.187238 2016-02-28 17:24:11.172899 ... None 420 False None None None None wcash-flow /scratch/home/wcash-flow/Reger/uncircumscription/ None

100 rows × 32 columns