> ## Documentation Index
> Fetch the complete documentation index at: https://inference-docs.cerebras.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Gemma 4 31B

> This model excels at multimodal reasoning across screenshots, documents, diagrams, and design assets. Ideal for visual agentic workflows, image-aware copilots, and teams migrating from closed multimodal APIs to an open model.

export const ModelInfo = ({modelId, modelCardUrl, playgroundUrl, contextLength = {}, maxOutput = {}, speed, inputOutput = {}, pricing = {}, rateLimits = [], endpoints = [], features = [], knownLimitations = []}) => {
  const [copied, setCopied] = React.useState(false);
  const handleCopy = () => {
    navigator.clipboard.writeText(modelId);
    setCopied(true);
    setTimeout(() => setCopied(false), 2000);
  };
  return <div className="space-y-6 not-prose">
      {modelId && <div className="flex items-center justify-between border border-zinc-200 dark:border-zinc-800 rounded-xl px-5 py-3 bg-[#f7f5f2] dark:bg-zinc-900">
          <div className="flex items-center gap-3">
            <span className="text-xs font-mono font-semibold tracking-wider text-zinc-500 dark:text-zinc-400 uppercase">Model ID</span>
            <button onClick={handleCopy} className="inline-flex items-center gap-1.5 group cursor-pointer bg-transparent border-0 p-0" title="Copy model ID">
              <code className="text-sm font-mono font-semibold text-zinc-900 dark:text-white">{modelId}</code>
              <span className="text-zinc-400 dark:text-zinc-500 group-hover:text-zinc-600 dark:group-hover:text-zinc-300 transition-colors">
                <Icon icon={copied ? "check" : "copy"} size={13} color="currentColor" />
              </span>
            </button>
          </div>
          <div className="flex items-center gap-6">
            {playgroundUrl && <a href={playgroundUrl} target="_blank" rel="noopener noreferrer" className="inline-flex items-center gap-1 text-sm text-zinc-700 dark:text-zinc-300 hover:text-zinc-900 dark:hover:text-white transition-colors">
                Try in Playground
                <Icon icon="arrow-right" size={14} color="currentColor" />
              </a>}
            {modelCardUrl && <a href={modelCardUrl} target="_blank" rel="noopener noreferrer" className="inline-flex items-center gap-1 text-sm text-zinc-700 dark:text-zinc-300 hover:text-zinc-900 dark:hover:text-white transition-colors">
                Model card
                <Icon icon="external-link" size={14} color="currentColor" />
              </a>}
          </div>
        </div>}

      <div className="bg-white dark:bg-zinc-950 border border-zinc-200 dark:border-zinc-800 rounded-xl p-6">
        <h3 className="text-base font-bold text-zinc-900 dark:text-white mb-6">Model Stats</h3>
        <div className="grid grid-cols-2 md:grid-cols-4 gap-6">
          <div>
            <div className="text-xs font-mono font-semibold tracking-wider text-zinc-500 dark:text-zinc-400 mb-3 uppercase">SPEED</div>
            <div className="text-3xl font-bold text-orange-500 dark:text-orange-400">{speed?.value}</div>
            <div className="text-sm text-zinc-500 dark:text-zinc-400 mt-1">{speed?.unit}</div>
          </div>

          <div>
            <div className="text-xs font-mono font-semibold tracking-wider text-zinc-500 dark:text-zinc-400 mb-3 uppercase">CONTEXT WINDOW</div>
            <div className="space-y-2">
              <div className="flex items-baseline gap-3">
                <span className="text-xs text-zinc-500 dark:text-zinc-400 w-8 shrink-0">Free</span>
                <span className="text-sm font-semibold text-zinc-900 dark:text-white">{contextLength.freeTier}</span>
              </div>
              <div className="flex items-baseline gap-3">
                <span className="text-xs text-zinc-500 dark:text-zinc-400 w-8 shrink-0">Paid</span>
                <span className="text-sm font-semibold text-zinc-900 dark:text-white">{contextLength.paidTiers}</span>
              </div>
            </div>
          </div>

          <div>
            <div className="text-xs font-mono font-semibold tracking-wider text-zinc-500 dark:text-zinc-400 mb-3 uppercase">MAX OUTPUT</div>
            <div className="space-y-2">
              <div className="flex items-baseline gap-3">
                <span className="text-xs text-zinc-500 dark:text-zinc-400 w-8 shrink-0">Free</span>
                <span className="text-sm font-semibold text-zinc-900 dark:text-white">{maxOutput.freeTier || 'N/A'}</span>
              </div>
              <div className="flex items-baseline gap-3">
                <span className="text-xs text-zinc-500 dark:text-zinc-400 w-8 shrink-0">Paid</span>
                <span className="text-sm font-semibold text-zinc-900 dark:text-white">{maxOutput.paidTiers || 'N/A'}</span>
              </div>
            </div>
          </div>

          <div>
            <div className="text-xs font-mono font-semibold tracking-wider text-zinc-500 dark:text-zinc-400 mb-3 uppercase">MODALITY</div>
            <div className="space-y-2">
              <div className="flex items-baseline gap-3">
                <span className="text-xs text-zinc-500 dark:text-zinc-400 w-10 shrink-0">Input</span>
                <span className="text-sm font-semibold text-zinc-900 dark:text-white">
                  {inputOutput.inputFormats ? inputOutput.inputFormats.map(f => f.charAt(0).toUpperCase() + f.slice(1)).join(', ') : 'Text'}
                </span>
              </div>
              <div className="flex items-baseline gap-3">
                <span className="text-xs text-zinc-500 dark:text-zinc-400 w-10 shrink-0">Output</span>
                <span className="text-sm font-semibold text-zinc-900 dark:text-white">
                  {inputOutput.outputFormats ? inputOutput.outputFormats.map(f => f.charAt(0).toUpperCase() + f.slice(1)).join(', ') : 'Text'}
                </span>
              </div>
            </div>
          </div>
        </div>
      </div>

      {pricing.inputPrice && pricing.outputPrice && <div className="bg-white dark:bg-zinc-950 border border-zinc-200 dark:border-zinc-800 rounded-xl p-6">
          <div className="flex items-baseline gap-2 mb-6">
            <h3 className="text-base font-bold text-zinc-900 dark:text-white">Pricing</h3>
            <span className="text-sm text-zinc-500 dark:text-zinc-400">per million tokens</span>
          </div>
          <div className="flex">
            <div className="flex-1">
              <div className="text-xs font-mono font-semibold tracking-wider text-zinc-500 dark:text-zinc-400 mb-2 uppercase">Input</div>
              <div className="text-4xl font-bold text-zinc-900 dark:text-white">{pricing.inputPrice.split(' /')[0]}</div>
            </div>
            <div className="w-px bg-zinc-200 dark:bg-zinc-800 mx-8"></div>
            <div className="flex-1">
              <div className="text-xs font-mono font-semibold tracking-wider text-zinc-500 dark:text-zinc-400 mb-2 uppercase">Output</div>
              <div className="text-4xl font-bold text-zinc-900 dark:text-white">{pricing.outputPrice.split(' /')[0]}</div>
            </div>
          </div>
          <p className="text-sm text-zinc-500 dark:text-zinc-400 mt-10">
            Developer pricing. For volume discounts and enterprise features, see our{' '}
            <a href="https://www.cerebras.ai/pricing" className="text-black dark:text-white font-semibold underline decoration-orange-500 underline-offset-4 decoration-1 hover:decoration-2">
              pricing page
            </a>
            .
          </p>
        </div>}

      {knownLimitations && knownLimitations.length > 0 && <div className="bg-white dark:bg-zinc-950 border border-zinc-200 dark:border-zinc-800 rounded-xl p-6">
          <h3 className="text-base font-bold text-zinc-900 dark:text-white mb-4">Model Notes</h3>
          <div className="space-y-2">
            {knownLimitations.map((limitation, index) => <div key={index} className="flex items-start gap-2 py-2">
                <div className="w-1.5 h-1.5 bg-zinc-400 dark:bg-zinc-500 rounded-full flex-shrink-0 mt-2"></div>
                <div className="text-zinc-900 dark:text-white text-sm leading-relaxed prose-sm max-w-none">
                  {limitation}
                </div>
              </div>)}
          </div>
        </div>}

      {rateLimits && rateLimits.length > 0 && <div className="bg-white dark:bg-zinc-950 border border-zinc-200 dark:border-zinc-800 rounded-xl p-6">
          <h3 className="text-base font-bold text-zinc-900 dark:text-white mb-4">Rate Limits</h3>
          <div className="overflow-x-auto">
            <table className="w-full text-sm">
              <thead>
                <tr className="border-b border-zinc-200 dark:border-zinc-800">
                  <th className="text-left py-3 px-2 text-zinc-500 dark:text-zinc-400 font-medium">Tier</th>
                  <th className="text-left py-3 px-2 text-zinc-500 dark:text-zinc-400 font-medium">Requests/min</th>
                  <th className="text-left py-3 px-2 text-zinc-500 dark:text-zinc-400 font-medium">Input Tokens/min</th>
                  <th className="text-left py-3 px-2 text-zinc-500 dark:text-zinc-400 font-medium">Daily Tokens</th>
                </tr>
              </thead>
              <tbody>
                {rateLimits.map((limit, index) => <tr key={index} className="border-b border-zinc-200/50 dark:border-zinc-800/50 last:border-b-0">
                    <td className="py-3 px-2 text-zinc-900 dark:text-white font-medium">{limit.tier}</td>
                    <td className="py-3 px-2 text-zinc-900 dark:text-white">{limit.requestsPerMin}</td>
                    <td className="py-3 px-2 text-zinc-900 dark:text-white">{limit.inputTokensPerMin}</td>
                    <td className="py-3 px-2 text-zinc-900 dark:text-white">{limit.dailyTokens}</td>
                  </tr>)}
              </tbody>
            </table>
          </div>
        </div>}

      {(endpoints && endpoints.length > 0 || features.length > 0) && <div className="grid grid-cols-2 gap-6">
          {endpoints && endpoints.length > 0 && <div className="bg-white dark:bg-zinc-950 border border-zinc-200 dark:border-zinc-800 rounded-xl p-6">
              <h3 className="text-base font-bold text-zinc-900 dark:text-white mb-4">Endpoints</h3>
              <ul className="space-y-2">
                {endpoints.map((endpoint, index) => {
    const endpointUrls = {
      'Chat Completions': '/v1/chat/completions',
      'Completions': '/v1/completions',
      'Models': '/v1/models'
    };
    const endpointName = typeof endpoint === 'string' ? endpoint : endpoint.name;
    const endpointUrl = typeof endpoint === 'object' && endpoint.url ? endpoint.url : endpointUrls[endpointName];
    return <li key={index} className="flex items-center gap-2 text-sm text-zinc-900 dark:text-white">
                      <div className="w-1.5 h-1.5 bg-zinc-400 dark:bg-zinc-500 rounded-full flex-shrink-0"></div>
                      <span className="font-medium">{endpointName}</span>
                      {endpointUrl && <code className="text-xs text-zinc-500 dark:text-zinc-400 font-mono">{endpointUrl}</code>}
                    </li>;
  })}
              </ul>
            </div>}
          {features.length > 0 && <div className="bg-white dark:bg-zinc-950 border border-zinc-200 dark:border-zinc-800 rounded-xl p-6">
              <h3 className="text-base font-bold text-zinc-900 dark:text-white mb-4">Capabilities</h3>
              <ul className="space-y-2">
                {features.map((feature, index) => <li key={index} className="flex items-center gap-2 text-sm text-zinc-900 dark:text-white">
                    <div className="w-1.5 h-1.5 bg-zinc-400 dark:bg-zinc-500 rounded-full flex-shrink-0"></div>
                    {feature}
                  </li>)}
              </ul>
            </div>}
        </div>}


      <div className="bg-gradient-to-r from-orange-600/10 to-red-500/10 border border-orange-600/20 rounded-xl p-6">
        <div className="flex items-center gap-3 mb-3">
          <div className="w-8 h-8 bg-orange-600/20 rounded-lg flex items-center justify-center">
            <Icon icon="rocket" size={18} color="#fb923c" />
          </div>
          <h3 className="text-lg font-semibold text-zinc-900 dark:text-white">Need Higher Limits?</h3>
        </div>
        <p className="text-zinc-700 dark:text-zinc-300">
          Reach out for custom pricing with our Enterprise tier for higher rate limits and dedicated support.
        </p>
        <div className="mt-4">
          <a href="https://cerebras.ai/contact-us" className="inline-flex items-center gap-2 bg-orange-600 hover:bg-orange-700 text-white px-4 py-2 rounded-lg transition-colors font-medium">
            Contact Sales
            <Icon icon="arrow-right" color="white" size={16} />
          </a>
        </div>
      </div>
    </div>;
};

<Callout icon="clock" color="#0eb540">
  This model is coming soon.
</Callout>

Model ID: `gemma-4-31b`. Speed: \~1500 tokens/sec. Context window: 65k tokens (free tier), 131k tokens (paid). Max output: 32k tokens (free tier), 40k tokens (paid). Pricing: coming soon. Modality: multimodal — accepts text and image inputs (base64 PNG or JPEG data URI only; external URLs not supported), produces text output. Max 5 images per request, 10 MB total image payload. Image inputs are only available in Chat Completions; the Completions endpoint does not support images. Capabilities: Image Inputs, Reasoning, Streaming, Sampling Controls, Structured Outputs, Tool Calling, Parallel Tool Calling, Prompt Caching. Reasoning is disabled by default; enable it with the `reasoning_effort` parameter. The `raw` and `hidden` reasoning formats are not supported. Structured outputs and tool calling with `strict: true` (constrained decoding) are supported. Rate limits (free tier): 5 requests/min, 30k input tokens/min, 1M tokens/day. Rate limits (Pay as You Go): 300 requests/min, 500k input tokens/min. Notes: Recommended starting parameters: temperature=1.0, top\_p=0.95.

<ModelInfo
  modelId="gemma-4-31b"
  modelCardUrl="https://huggingface.co/google/gemma-4-31B-it"
  playgroundUrl="https://cloud.cerebras.ai"
  contextLength={{
freeTier: "65k tokens",
paidTiers: "131k tokens"
}}
  maxOutput={{
freeTier: "32k tokens",
paidTiers: "40k tokens"
}}
  speed={{
value: "~1850",
unit: "tokens/sec"
}}
  rateLimits={[
{
  tier: "Free",
  requestsPerMin: "5",
  inputTokensPerMin: "30k",
  dailyTokens: "1M"
},
{
  tier: "Pay as You Go",
  requestsPerMin: "300",
  inputTokensPerMin: "500k",
  dailyTokens: "N/A"
}
]}
  endpoints={[
"Chat Completions",
"Completions"
]}
  features={[
"Image Inputs",
"Reasoning",
"Streaming",
"Sampling Controls",
"Structured Outputs",
"Tool Calling",
"Parallel Tool Calling",
"Prompt Caching"
]}
  inputOutput={{
inputFormats: ["text", "image"],
outputFormats: ["text"]
}}
  knownLimitations={[
 <span>
  Image inputs are only available in Chat Completions. The Completions endpoint does not support image inputs.
</span>,
<span>
  See the <a href="/capabilities/image-inputs" className="font-semibold text-zinc-900 dark:text-white underline underline-offset-4 decoration-2 hover:text-orange-500 hover:decoration-4" style={{ textDecorationColor: '#f97316' }}>Image Inputs guide</a> for limits and token behavior.
</span>,
<span>
Structured outputs and tool calling with <code>strict: true</code> (constrained decoding) is supported for this model.
</span>,
<span>
  Reasoning is disabled by default for Gemma 4. Use <code>reasoning_effort</code> to control it in Chat Completions. Gemma 4 does not support <code>raw</code> or <code>hidden</code> reasoning formats today. See the <a href="/capabilities/reasoning#gemma-4:-reasoning_effort" className="font-semibold text-zinc-900 dark:text-white underline underline-offset-4 decoration-2 hover:text-orange-500 hover:decoration-4" style={{ textDecorationColor: '#f97316' }}>reasoning guide</a> for details.
</span>
]}
/>
