skip to content

Configure BabelDOC as Translator

5 min read

I found a very interesting and useful open-source project BableDOC.

Install BabelDOC

First install uv:

pip install uv

Then we can install BabelDOC:

uv tool install --python 3.12 BabelDOC

Configure DeepSeek API

On MacOS:

echo 'export DEEPSEEK_API_KEY="My DeepSeek API Key"' >> ~/.zshrc
echo 'export DEEPSEEK_BASE_URL="https://api.deepseek.com/v1"' >> ~/.zshrc
echo 'export DEEPSEEK_MODEL="deepseek-chat"' >> ~/.zshrc
source ~/.zshrc

On Windows:

setx DEEPSEEK_API_KEY "My DeepSeek API Key"
setx DEEPSEEK_BASE_URL "https://api.deepseek.com"
setx DEEPSEEK_MODEL "deepseek-chat"

Restart Powershell to take effect.

Create a Command Line Function to Make Translation Easie

On MacOS:

Paste following function in to ~/.zshrc:

bd() {
    # -----------------------
    # Argument Parsing
    # -----------------------
    FILE=""
    OUTDIR=""
    LANG_IN="en"
    LANG_OUT="zh"
    FULL_MODE=0     # 0 = bilingual only, 1 = mono + bilingual
 
    while [[ $# -gt 0 ]]; do
        case "$1" in
            --out)
                OUTDIR="$2"
                shift 2
                ;;
            --lang-in)
                LANG_IN="$2"
                shift 2
                ;;
            --lang-out)
                LANG_OUT="$2"
                shift 2
                ;;
            --full)
                FULL_MODE=1
                shift
                ;;
            -*)
                echo "Unknown option: $1"
                echo "Usage: bd <pdf_path> [--full] [--out dir] [--lang-in en] [--lang-out zh]"
                return 1
                ;;
            *)
                FILE="$1"
                shift
                ;;
        esac
    done
 
    if [ -z "$FILE" ]; then
        echo "Usage: bd <pdf_path> [--full] [--out dir] [--lang-in en] [--lang-out zh]"
        return 1
    fi
 
    if [ -z "$OUTDIR" ]; then
        OUTDIR="$(dirname "$FILE")/translated"
    fi
 
    mkdir -p "$OUTDIR"
 
    echo "=========================="
    echo "Input file: $FILE"
    echo "Output directory: $OUTDIR"
    echo "Language: $LANG_IN$LANG_OUT"
    if [ $FULL_MODE -eq 1 ]; then
        echo "Output mode: bilingual + monolingual"
    else
        echo "Output mode: bilingual only"
    fi
    echo "=========================="
 
    # -----------------------
    # Build BabelDOC arguments
    # -----------------------
    EXTRA_ARGS=""
    if [ $FULL_MODE -eq 0 ]; then
        EXTRA_ARGS="--no-mono"   # only dual-language PDF
    fi
 
    # -----------------------
    # Run BabelDOC and capture output
    # -----------------------
    echo ""
    echo "▶ Running BabelDOC..."
    OUTPUT=$(
      babeldoc \
        --openai \
        --openai-model "$DEEPSEEK_MODEL" \
        --openai-base-url "$DEEPSEEK_BASE_URL" \
        --openai-api-key "$DEEPSEEK_API_KEY" \
        --files "$FILE" \
        --lang-in "$LANG_IN" \
        --lang-out "$LANG_OUT" \
        --output "$OUTDIR" \
        $EXTRA_ARGS \
        2>&1
    )
 
    # Show BabelDOC output to user
    printf '%s\n' "$OUTPUT"
 
    echo ""
    echo "=========================="
    echo " Translation completed. Checking token usage and balance..."
    echo "=========================="
 
    # -----------------------
    # Parse token usage from BabelDOC output
    # -----------------------
    echo ""
    echo "📘 Parsing BabelDOC output for token usage..."
    TOKENS_LINE=$(printf '%s\n' "$OUTPUT" | grep -E "Total tokens" | tail -n 1)
 
    if [ -n "$TOKENS_LINE" ]; then
        # Extract the number after the last colon or equals sign
        TOKENS=$(printf '%s\n' "$TOKENS_LINE" | sed 's/.*[=:][ ]*\([0-9][0-9]*\).*/\1/')
        if [ -n "$TOKENS" ]; then
            echo "🔢 Token usage for this translation: $TOKENS"
        else
            echo "⚠️ Found a 'Total tokens' line, but failed to parse the number:"
            echo "    $TOKENS_LINE"
        fi
    else
        echo "⚠️ No 'Total tokens' line found in BabelDOC output."
    fi
 
    # -----------------------
    # Fetch DeepSeek Balance (correct endpoint: /user/balance)
    # -----------------------
    echo ""
    echo "📡 Fetching DeepSeek API balance..."
    BALANCE_JSON=$(curl -s -H "Authorization: Bearer $DEEPSEEK_API_KEY" https://api.deepseek.com/user/balance)
 
    if [[ "$BALANCE_JSON" == *"balance_infos"* ]]; then
        # Try to extract first total_balance (any currency)
        TOTAL_BALANCE=$(printf '%s\n' "$BALANCE_JSON" | sed -n 's/.*"total_balance":"\([^"]*\)".*/\1/p' | head -n 1)
        CURRENCY=$(printf '%s\n' "$BALANCE_JSON" | sed -n 's/.*"currency":"\([^"]*\)".*/\1/p' | head -n 1)
        if [ -n "$TOTAL_BALANCE" ]; then
            echo "💰 DeepSeek balance: $TOTAL_BALANCE $CURRENCY"
        else
            echo "⚠️ Balance API returned JSON, but parsing failed:"
            echo "    $BALANCE_JSON"
        fi
    elif [ -n "$BALANCE_JSON" ]; then
        echo "⚠️ Unexpected balance response:"
        echo "    $BALANCE_JSON"
    else
        echo "⚠️ Failed to retrieve balance (empty response)."
    fi
 
    echo ""
    echo "🎉 Done! Translated files saved to: $OUTDIR"
}
 
image-20251205203954771

On Windows:

Create bd.ps:

notepad $HOME\bd.ps1

Write:

param(
    [Parameter(Mandatory=$true)]
    [string]$File,
 
    [string]$Out = "",
    [string]$LangIn = "en",
    [string]$LangOut = "zh",
 
    [switch]$Full
)
 
# ---------------------
# Determine output directory
# ---------------------
if ($Out -eq "") {
    $Out = Join-Path (Split-Path $File -Parent) "translated"
}
mkdir $Out -Force | Out-Null
 
Write-Host "=========================="
Write-Host "Input file: $File"
Write-Host "Output directory: $Out"
Write-Host "Language: $LangIn$LangOut"
if ($Full) {
    Write-Host "Output mode: bilingual + monolingual"
} else {
    Write-Host "Output mode: bilingual only"
}
Write-Host "=========================="
 
# ---------------------
# Build BabelDOC args
# ---------------------
$ExtraArgs = @()
if (-not $Full) {
    $ExtraArgs += "--no-mono"
}
 
# ---------------------
# Run BabelDOC with live output, also save to log
# ---------------------
Write-Host ""
Write-Host "Running BabelDOC..."
 
$cmd = $babeldocCmd + @(
    "--openai",
    "--openai-model", $env:DEEPSEEK_MODEL,
    "--openai-base-url", $env:DEEPSEEK_BASE_URL,
    "--openai-api-key", $env:DEEPSEEK_API_KEY,
    "--files", $File,
    "--lang-in", $LangIn,
    "--lang-out", $LangOut,
    "--output", $Out
) + $ExtraArgs
 
$logFile = Join-Path $Out "babeldoc_run.log"
 
# live output + log to file
& $cmd[0] $cmd[1..($cmd.Count-1)] 2>&1 | Tee-Object -FilePath $logFile
 
Write-Host "`n=========================="
Write-Host " Translation completed. Checking token usage & balance..."
Write-Host "=========================="
 
# ---------------------
# Parse token usage
# ---------------------
Write-Host "`n📘 Parsing token usage..."
 
$TokenLine = $Output | Select-String -Pattern "Total tokens"
if ($TokenLine) {
    $Token = ($TokenLine -replace ".*:\s*", "")
    Write-Host "🔢 Token usage: $Token"
} else {
    Write-Host "⚠️ No token info found."
}
 
# ---------------------
# Fetch DeepSeek balance
# ---------------------
Write-Host "`n📡 Fetching DeepSeek API balance..."
 
$BalanceJson = Invoke-RestMethod -Headers @{Authorization = "Bearer $env:DEEPSEEK_API_KEY"} `
    -Uri "https://api.deepseek.com/user/balance" `
    -Method GET `
    -ErrorAction SilentlyContinue
 
if ($BalanceJson) {
    $Balance = $BalanceJson.balance_infos[0].total_balance
    $Currency = $BalanceJson.balance_infos[0].currency
    Write-Host "💰 DeepSeek Balance: $Balance $Currency"
} else {
    Write-Host "⚠️ Failed to fetch balance."
}
 
Write-Host "`n🎉 Done! Files saved to $Out"
 

Allow powershell to run the .ps1 script:

Set-ExecutionPolicy -Scope CurrentUser RemoteSigned

Use bd anywhere:

notepad $PROFILE

Add bd to profile:

function bd {
    & "$HOME\bd.ps1" @args
}

Then:

. $PROFILE

Simplest (default: en → zh, default output directory)

bd paper.pdf

Specify output directory

bd paper.pdf --out ./translated

Both Mono and Dual language

bd paper.pdf --full

Specify languages

bd paper.pdf --lang-in en --lang-out ja