GPandas provides seamless integration with SQL databases and Google BigQuery, allowing you to query data directly into DataFrames.

 

Overview

GPandas supports two main data sources for SQL integration:

FunctionData SourceUse Case
Read_sql()SQL DatabasesSQL Server, PostgreSQL, and other database/sql compatible databases
From_gbq()Google BigQueryLarge-scale analytics on BigQuery tables

 

Architecture

flowchart LR
    subgraph Databases["Data Sources"]
        MSSQL[(SQL Server)]
        PG[(PostgreSQL)]
        GBQ[(BigQuery)]
    end
    
    subgraph GPandas["GPandas"]
        CONFIG[DbConfig]
        RSQL[Read_sql]
        FGBQ[From_gbq]
    end
    
    subgraph Output["Output"]
        DF[DataFrame]
        SER[Series Columns]
    end
    
    MSSQL --> CONFIG
    PG --> CONFIG
    CONFIG --> RSQL
    GBQ --> FGBQ
    RSQL --> DF
    FGBQ --> DF
    DF --> SER
    
    style Databases fill:#1e293b,stroke:#f59e0b,stroke-width:2px
    style GPandas fill:#1e293b,stroke:#3b82f6,stroke-width:2px
    style Output fill:#1e293b,stroke:#22c55e,stroke-width:2px

 


 

Read_sql() - SQL Databases

The Read_sql() function executes a SQL query against a relational database and returns results as a DataFrame.

 

Function Signature

func (GoPandas) Read_sql(query string, db_config DbConfig) (*dataframe.DataFrame, error)

 

DbConfig Structure

Configure your database connection using the DbConfig struct:

type DbConfig struct {
    Database_server string  // Database type: "sqlserver" or "postgres"
    Server          string  // Hostname or IP address
    Port            string  // Port number
    Database        string  // Database name
    Username        string  // Database user
    Password        string  // Database password
}

 

Parameters

ParameterTypeDescription
querystringSQL query to execute
db_configDbConfigDatabase connection configuration

 

Returns

TypeDescription
*dataframe.DataFrameQuery results as a DataFrame
errorError if connection or query fails

 

SQL Server Example

package main

import (
    "fmt"
    "log"
    "os"

    "github.com/apoplexi24/gpandas"
)

func main() {
    gp := gpandas.GoPandas{}

    // Configure connection (prefer environment variables)
    config := gpandas.DbConfig{
        Database_server: "sqlserver",
        Server:          os.Getenv("DB_SERVER"),
        Port:            "1433",
        Database:        os.Getenv("DB_NAME"),
        Username:        os.Getenv("DB_USER"),
        Password:        os.Getenv("DB_PASSWORD"),
    }

    // Execute query
    query := `
        SELECT 
            employee_id,
            first_name,
            last_name,
            department,
            salary
        FROM employees
        WHERE department = 'Engineering'
        ORDER BY salary DESC
    `

    df, err := gp.Read_sql(query, config)
    if err != nil {
        log.Fatalf("Query failed: %v", err)
    }

    fmt.Println("Engineering Team:")
    fmt.Println(df.String())
}

 

PostgreSQL Example

package main

import (
    "fmt"
    "log"
    "os"

    "github.com/apoplexi24/gpandas"
)

func main() {
    gp := gpandas.GoPandas{}

    config := gpandas.DbConfig{
        Database_server: "postgres",  // Use "postgres" for PostgreSQL
        Server:          os.Getenv("PG_HOST"),
        Port:            "5432",
        Database:        os.Getenv("PG_DATABASE"),
        Username:        os.Getenv("PG_USER"),
        Password:        os.Getenv("PG_PASSWORD"),
    }

    query := `
        SELECT 
            product_name,
            category,
            price,
            stock_quantity
        FROM products
        WHERE price > 50.00
        LIMIT 100
    `

    df, err := gp.Read_sql(query, config)
    if err != nil {
        log.Fatalf("Query failed: %v", err)
    }

    fmt.Println(df.String())
}

 

Connection String Formats

GPandas builds connection strings based on the Database_server value:

DatabaseFormat
SQL Serverserver=X;user id=X;password=X;port=X;database=X
PostgreSQLhost=X port=X user=X password=X dbname=X sslmode=disable

 


 

From_gbq() - Google BigQuery

The From_gbq() function queries Google BigQuery and returns results as a DataFrame.

 

Function Signature

func (GoPandas) From_gbq(query string, projectID string) (*dataframe.DataFrame, error)

 

Parameters

ParameterTypeDescription
querystringBigQuery SQL query
projectIDstringGoogle Cloud Project ID

 

Returns

TypeDescription
*dataframe.DataFrameQuery results as a DataFrame
errorError if query execution fails

 

Prerequisites

Before using From_gbq(), ensure you have:

  1. Google Cloud credentials configured
  2. BigQuery API enabled in your project
  3. Appropriate IAM permissions (BigQuery Data Viewer, BigQuery Job User)

 

Setting Up Credentials

Set the GOOGLE_APPLICATION_CREDENTIALS environment variable:

export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account-key.json"

Or use Application Default Credentials:

gcloud auth application-default login

 

BigQuery Example

package main

import (
    "fmt"
    "log"

    "github.com/apoplexi24/gpandas"
)

func main() {
    gp := gpandas.GoPandas{}

    projectID := "my-gcp-project"
    
    query := `
        SELECT 
            date,
            product_category,
            SUM(revenue) as total_revenue,
            COUNT(*) as transaction_count
        FROM ` + "`my-gcp-project.sales.transactions`" + `
        WHERE date >= '2024-01-01'
        GROUP BY date, product_category
        ORDER BY date DESC, total_revenue DESC
        LIMIT 1000
    `

    df, err := gp.From_gbq(query, projectID)
    if err != nil {
        log.Fatalf("BigQuery failed: %v", err)
    }

    fmt.Println("Sales Summary:")
    fmt.Println(df.String())

    // Export results locally
    _, err = df.ToCSV("sales_summary.csv", ",")
    if err != nil {
        log.Printf("Export warning: %v", err)
    }
}

 

BigQuery Public Datasets Example

Query public datasets without requiring your own data:

package main

import (
    "fmt"
    "log"

    "github.com/apoplexi24/gpandas"
)

func main() {
    gp := gpandas.GoPandas{}

    projectID := "my-gcp-project"
    
    // Query a public dataset
    query := `
        SELECT 
            name,
            number,
            year
        FROM ` + "`bigquery-public-data.usa_names.usa_1910_current`" + `
        WHERE year = 2020
        ORDER BY number DESC
        LIMIT 10
    `

    df, err := gp.From_gbq(query, projectID)
    if err != nil {
        log.Fatalf("Query failed: %v", err)
    }

    fmt.Println("Top 10 Baby Names in 2020:")
    fmt.Println(df.String())
}

 


 

Data Flow

Both functions follow a similar pattern for converting query results to DataFrames:

sequenceDiagram
    participant App as Application
    participant GP as GPandas
    participant DB as Database
    participant DF as DataFrame

    App->>GP: Call Read_sql() or From_gbq()
    GP->>DB: Execute Query
    DB-->>GP: Return Result Set
    
    loop For Each Column
        GP->>GP: Extract Column Values
        GP->>GP: Create Series
    end
    
    GP->>DF: Build DataFrame
    DF-->>App: Return DataFrame

 

Type Mapping

Query result types are mapped to Go types:

SQL TypeBigQuery TypeGo Type
INT, INTEGERINT64int64
FLOAT, REALFLOAT64float64
VARCHAR, TEXTSTRINGstring
BIT, BOOLEANBOOLbool
DATE, DATETIMEDATE, TIMESTAMPVaries
NULLNULLnil

 

Error Handling

SQL Database Errors

df, err := gp.Read_sql(query, config)
if err != nil {
    // Common error patterns
    switch {
    case strings.Contains(err.Error(), "connection"):
        log.Fatal("Database connection failed - check server/credentials")
    case strings.Contains(err.Error(), "query execution"):
        log.Fatal("Query syntax error or permission issue")
    case strings.Contains(err.Error(), "scanning"):
        log.Fatal("Data type conversion error")
    default:
        log.Fatalf("Database error: %v", err)
    }
}

 

BigQuery Errors

df, err := gp.From_gbq(query, projectID)
if err != nil {
    switch {
    case strings.Contains(err.Error(), "NewClient"):
        log.Fatal("BigQuery client creation failed - check credentials")
    case strings.Contains(err.Error(), "query.Read"):
        log.Fatal("Query execution failed - check syntax and permissions")
    case strings.Contains(err.Error(), "no rows"):
        log.Fatal("Query returned no results")
    default:
        log.Fatalf("BigQuery error: %v", err)
    }
}

 

Best Practices

PracticeDescription
Use environment variablesNever hardcode credentials in source code
Limit result setsUse LIMIT clause to avoid memory issues
Use parameterized queriesPrevent SQL injection (handle at application level)
Index key columnsEnsure database tables are properly indexed
Close connectionsGPandas handles this automatically with defer

 

Security Considerations

Credential Management

// Good: Use environment variables
config := gpandas.DbConfig{
    Database_server: "sqlserver",
    Server:          os.Getenv("DB_SERVER"),
    Port:            os.Getenv("DB_PORT"),
    Database:        os.Getenv("DB_NAME"),
    Username:        os.Getenv("DB_USER"),
    Password:        os.Getenv("DB_PASSWORD"),
}

// Bad: Hardcoded credentials
config := gpandas.DbConfig{
    Password: "my-secret-password",  // Never do this!
}

 

Query Safety

// Be careful with dynamic queries
// Always validate user input before including in queries
func queryByDepartment(gp gpandas.GoPandas, config gpandas.DbConfig, dept string) (*dataframe.DataFrame, error) {
    // Validate input
    validDepts := map[string]bool{
        "Engineering": true,
        "Sales":       true,
        "Marketing":   true,
    }
    
    if !validDepts[dept] {
        return nil, fmt.Errorf("invalid department: %s", dept)
    }
    
    query := fmt.Sprintf(`
        SELECT * FROM employees 
        WHERE department = '%s'
    `, dept)
    
    return gp.Read_sql(query, config)
}

 

Complete Example: ETL Pipeline

package main

import (
    "fmt"
    "log"
    "os"

    "github.com/apoplexi24/gpandas"
)

func main() {
    gp := gpandas.GoPandas{}

    // Step 1: Extract from SQL Server
    sqlConfig := gpandas.DbConfig{
        Database_server: "sqlserver",
        Server:          os.Getenv("SQL_SERVER"),
        Port:            "1433",
        Database:        os.Getenv("SQL_DATABASE"),
        Username:        os.Getenv("SQL_USER"),
        Password:        os.Getenv("SQL_PASSWORD"),
    }

    ordersDF, err := gp.Read_sql(`
        SELECT order_id, customer_id, product_id, quantity, order_date
        FROM orders
        WHERE order_date >= '2024-01-01'
    `, sqlConfig)
    if err != nil {
        log.Fatalf("Failed to fetch orders: %v", err)
    }
    fmt.Println("Orders loaded:", ordersDF.String())

    // Step 2: Extract from BigQuery
    analyticsDF, err := gp.From_gbq(`
        SELECT product_id, category, avg_rating
        FROM analytics.product_metrics
        WHERE avg_rating >= 4.0
    `, "my-project")
    if err != nil {
        log.Fatalf("Failed to fetch analytics: %v", err)
    }
    fmt.Println("Analytics loaded:", analyticsDF.String())

    // Step 3: Export combined data
    _, err = ordersDF.ToCSV("exports/orders_2024.csv", ",")
    if err != nil {
        log.Printf("Export warning: %v", err)
    }

    fmt.Println("ETL pipeline complete!")
}

 

See Also