Пакетная обработка

Обрабатывайте крупные AI-нагрузки эффективно на GPU Clore.ai

Обрабатывайте большие объемы задач эффективно на GPU CLORE.AI.

Найдите подходящий GPU на CLORE.AI Marketplace.

Когда использовать пакетную обработку

Обработка сотен/тысяч элементов
Преобразование больших наборов данных
Генерация большого количества изображений/видео
Массовая транскрипция
Подготовка данных для обучения

Пакетная обработка LLM

vLLM Batch API

vLLM автоматически обрабатывает батчи с помощью непрерывной пакетной обработки:

from openai import OpenAI
import asyncio
import aiohttp

client = OpenAI(base_url="http://server:8000/v1", api_key="dummy")

# Синхронный батч
def process_batch_sync(prompts):
    results = []
    for prompt in prompts:
        response = client.chat.completions.create(
            model="meta-llama/Llama-3.1-8B-Instruct",
            messages=[{"role": "user", "content": prompt}]
        )
        results.append(response.choices[0].message.content)
    return results

# Обработать 100 подсказок
prompts = [f"Summarize topic {i}" for i in range(100)]
results = process_batch_sync(prompts)

Асинхронная пакетная обработка (быстрее)

import asyncio
from openai import AsyncOpenAI

client = AsyncOpenAI(base_url="http://server:8000/v1", api_key="dummy")

async def process_single(prompt):
    response = await client.chat.completions.create(
        model="meta-llama/Llama-3.1-8B-Instruct",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

async def process_batch_async(prompts, max_concurrent=10):
    semaphore = asyncio.Semaphore(max_concurrent)

    async def limited_process(prompt):
        async with semaphore:
            return await process_single(prompt)

    tasks = [limited_process(p) for p in prompts]
    return await asyncio.gather(*tasks)

# Обработать 1000 подсказок с 10 параллельными запросами
prompts = [f"Generate description for product {i}" for i in range(1000)]
results = asyncio.run(process_batch_async(prompts, max_concurrent=10))

Батч с отслеживанием прогресса

import asyncio
from tqdm.asyncio import tqdm
from openai import AsyncOpenAI

client = AsyncOpenAI(base_url="http://server:8000/v1", api_key="dummy")

async def process_with_progress(prompts, max_concurrent=10):
    semaphore = asyncio.Semaphore(max_concurrent)
    results = []

    async def process_one(prompt, idx):
        async with semaphore:
            response = await client.chat.completions.create(
                model="meta-llama/Llama-3.1-8B-Instruct",
                messages=[{"role": "user", "content": prompt}]
            )
            return idx, response.choices[0].message.content

    tasks = [process_one(p, i) for i, p in enumerate(prompts)]

    for coro in tqdm.as_completed(tasks, total=len(tasks)):
        idx, result = await coro
        results.append((idx, result))

    # Сортировать в исходном порядке
    results.sort(key=lambda x: x[0])
    return [r[1] for r in results]

# Запустить
prompts = ["..." for _ in range(500)]
results = asyncio.run(process_with_progress(prompts))

Сохранение прогресса для долгих батчей

import json
from pathlib import Path

def process_batch_with_checkpoint(prompts, checkpoint_file="checkpoint.json"):
    # Загрузить контрольную точку
    checkpoint = Path(checkpoint_file)
    if checkpoint.exists():
        with open(checkpoint) as f:
            data = json.load(f)
            results = data['results']
            start_idx = data['last_completed'] + 1
        print(f"Resuming from index {start_idx}")
    else:
        results = [None] * len(prompts)
        start_idx = 0

    # Обработать оставшиеся
    for i in range(start_idx, len(prompts)):
        try:
            response = client.chat.completions.create(
                model="meta-llama/Llama-3.1-8B-Instruct",
                messages=[{"role": "user", "content": prompts[i]}]
            )
            results[i] = response.choices[0].message.content

            # Сохранять чекпоинт каждые 10 элементов
            if i % 10 == 0:
                with open(checkpoint_file, 'w') as f:
                    json.dump({'results': results, 'last_completed': i}, f)
                print(f"Checkpoint saved at {i}")

        except Exception as e:
            print(f"Error at {i}: {e}")
            # Сохранить чекпоинт при ошибке
            with open(checkpoint_file, 'w') as f:
                json.dump({'results': results, 'last_completed': i - 1}, f)
            raise

    # Удалить чекпоинт по завершении
    if checkpoint.exists():
        checkpoint.unlink()

    return results

Генерация изображений пакетно

SD WebUI пакетная обработка

import requests
import base64
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

SD_API = "http://server:7860"

def generate_image(prompt, output_path):
    response = requests.post(f'{SD_API}/sdapi/v1/txt2img', json={
        'prompt': prompt,
        'negative_prompt': 'размыто, низкое качество',
        'steps': 20,
        'width': 512,
        'height': 512
    })

    image_data = base64.b64decode(response.json()['images'][0])

    with open(output_path, 'wb') as f:
        f.write(image_data)

    return output_path

def batch_generate(prompts, output_dir, max_workers=4):
    Path(output_dir).mkdir(exist_ok=True)

    tasks = [
        (prompt, f"{output_dir}/image_{i:04d}.png")
        for i, prompt in enumerate(prompts)
    ]

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(tqdm(
            executor.map(lambda x: generate_image(*x), tasks),
            total=len(tasks)
        ))

    return results

# Сгенерировать 100 изображений
prompts = [f"A beautiful landscape, style {i}" for i in range(100)]
batch_generate(prompts, "./outputs", max_workers=4)

ComfyUI пакет с очередью

import json
import urllib.request
import time
from pathlib import Path

SERVER = "server:8188"

def queue_prompt(workflow):
    data = json.dumps({"prompt": workflow}).encode('utf-8')
    req = urllib.request.Request(f"http://{SERVER}/prompt", data=data)
    return json.loads(urllib.request.urlopen(req).read())

def get_history(prompt_id):
    with urllib.request.urlopen(f"http://{SERVER}/history/{prompt_id}") as response:
        return json.loads(response.read())

def batch_generate_comfyui(prompts, base_workflow_path, output_dir):
    Path(output_dir).mkdir(exist_ok=True)

    # Загрузить базовый рабочий процесс
    with open(base_workflow_path) as f:
        base_workflow = json.load(f)

    prompt_ids = []

    # Поставить все подсказки в очередь
    for i, prompt in enumerate(prompts):
        workflow = base_workflow.copy()
        # Изменить узел подсказки (при необходимости скорректируйте ID узла)
        workflow["6"]["inputs"]["text"] = prompt
        # Установить имя выходного файла
        workflow["9"]["inputs"]["filename_prefix"] = f"batch_{i:04d}"

        result = queue_prompt(workflow)
        prompt_ids.append(result['prompt_id'])
        print(f"Queued {i+1}/{len(prompts)}")

    # Ожидание завершения
    print("Waiting for generation...")
    completed = set()
    while len(completed) < len(prompt_ids):
        for pid in prompt_ids:
            if pid not in completed:
                history = get_history(pid)
                if pid in history:
                    completed.add(pid)
                    print(f"Completed {len(completed)}/{len(prompt_ids)}")
        time.sleep(1)

    print("All done!")

FLUX пакетная обработка

import torch
from diffusers import FluxPipeline
from pathlib import Path
from tqdm import tqdm

# Загрузить модель один раз
pipe = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell",
    torch_dtype=torch.bfloat16
)
pipe.to("cuda")

def batch_generate_flux(prompts, output_dir, batch_size=4):
    Path(output_dir).mkdir(exist_ok=True)

    for i in tqdm(range(0, len(prompts), batch_size)):
        batch_prompts = prompts[i:i + batch_size]

        # Сгенерировать батч
        images = pipe(
            batch_prompts,
            height=1024,
            width=1024,
            num_inference_steps=4,
            guidance_scale=0.0
        ).images

        # Сохранить
        for j, img in enumerate(images):
            img.save(f"{output_dir}/image_{i+j:04d}.png")

# Сгенерировать 100 изображений батчами по 4
prompts = [f"A {animal} in a forest" for animal in ["cat", "dog", "fox"] * 34]
batch_generate_flux(prompts, "./flux_outputs", batch_size=4)

Пакетная обработка аудио

Whisper пакетная транскрипция

import whisper
from pathlib import Path
from tqdm import tqdm
import json

model = whisper.load_model("large-v3")

def batch_transcribe(audio_files, output_dir):
    Path(output_dir).mkdir(exist_ok=True)
    results = {}

    for audio_path in tqdm(audio_files):
        try:
            result = model.transcribe(str(audio_path))

            results[audio_path.name] = {
                'text': result['text'],
                'language': result['language'],
                'segments': result['segments']
            }

            # Сохранить отдельную расшифровку
            output_file = Path(output_dir) / f"{audio_path.stem}.json"
            with open(output_file, 'w') as f:
                json.dump(results[audio_path.name], f, indent=2)

        except Exception as e:
            print(f"Error processing {audio_path}: {e}")
            results[audio_path.name] = {'error': str(e)}

    # Сохранить объединенные результаты
    with open(f"{output_dir}/all_transcripts.json", 'w') as f:
        json.dump(results, f, indent=2)

    return results

# Транскрибировать все аудиофайлы в директории
audio_files = list(Path("./audio").glob("*.mp3"))
results = batch_transcribe(audio_files, "./transcripts")

Параллельный Whisper (несколько GPU)

import whisper
from concurrent.futures import ProcessPoolExecutor
import torch

def transcribe_on_gpu(args):
    audio_path, gpu_id = args
    torch.cuda.set_device(gpu_id)
    model = whisper.load_model("large-v3", device=f"cuda:{gpu_id}")
    result = model.transcribe(audio_path)
    return audio_path, result['text']

def parallel_transcribe(audio_files, num_gpus=2):
    # Распределить файлы по GPU
    tasks = [(f, i % num_gpus) for i, f in enumerate(audio_files)]

    with ProcessPoolExecutor(max_workers=num_gpus) as executor:
        results = list(executor.map(transcribe_on_gpu, tasks))

    return dict(results)

Пакетная обработка видео

Пакетная генерация видео (SVD)

from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image, export_to_video
from pathlib import Path
from tqdm import tqdm
import torch

pipe = StableVideoDiffusionPipeline.from_pretrained(
    "stabilityai/stable-video-diffusion-img2vid-xt",
    torch_dtype=torch.float16,
    variant="fp16"
)
pipe.to("cuda")

def batch_generate_videos(image_paths, output_dir):
    Path(output_dir).mkdir(exist_ok=True)

    for img_path in tqdm(image_paths):
        try:
            image = load_image(str(img_path))
            image = image.resize((1024, 576))

            frames = pipe(
                image,
                num_frames=25,
                decode_chunk_size=8
            ).frames[0]

            output_path = Path(output_dir) / f"{img_path.stem}.mp4"
            export_to_video(frames, str(output_path), fps=7)

        except Exception as e:
            print(f"Error with {img_path}: {e}")

# Обработать все изображения
images = list(Path("./input_images").glob("*.png"))
batch_generate_videos(images, "./output_videos")

Шаблоны конвейера данных

Шаблон Производитель-Потребитель

import asyncio
from asyncio import Queue

async def producer(queue, items):
    """Добавить элементы в очередь"""
    for item in items:
        await queue.put(item)
    # Сигнал о завершении
    for _ in range(NUM_WORKERS):
        await queue.put(None)

async def consumer(queue, results, worker_id):
    """Обрабатывать элементы из очереди"""
    while True:
        item = await queue.get()
        if item is None:
            break

        try:
            result = await process_item(item)
            results.append(result)
        except Exception as e:
            print(f"Worker {worker_id} error: {e}")

        queue.task_done()

async def run_pipeline(items, num_workers=5):
    queue = Queue(maxsize=100)
    results = []

    # Запустить воркеры
    workers = [
        asyncio.create_task(consumer(queue, results, i))
        for i in range(num_workers)
    ]

    # Запустить производителя
    await producer(queue, items)

    # Ожидание завершения
    await asyncio.gather(*workers)

    return results

NUM_WORKERS = 5
items = list(range(1000))
results = asyncio.run(run_pipeline(items))

Шаблон Map-Reduce

from concurrent.futures import ProcessPoolExecutor
from functools import reduce

def map_function(item):
    """Обработать один элемент"""
    # Ваша логика обработки
    return process(item)

def reduce_function(results):
    """Объединить результаты"""
    return combine(results)

def map_reduce(items, num_workers=4):
    # Фаза отображения (map)
    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        mapped = list(executor.map(map_function, items))

    # Фаза редукции (reduce)
    result = reduce_function(mapped)

    return result

Советы по оптимизации

1. Правильная настройка параллелизма

# LLM: Соответствовать максимальному размеру батча vLLM
max_concurrent = 10  # значение по умолчанию vLLM

# Генерация изображений: 1-4 в зависимости от объема VRAM
max_concurrent = 2  # SD WebUI
max_concurrent = 4  # FLUX на RTX 4090

# Транскрипция: 1 на GPU
max_concurrent = num_gpus

2. Настройка размера батча

# Слишком маленький: недоиспользование GPU
# Слишком большой: ошибки OOM

# Размеры батчей для генерации изображений:
# RTX 3060: batch_size = 1
# RTX 3090: batch_size = 2-4
# RTX 4090: batch_size = 4-8
# A100: batch_size = 8-16

3. Управление памятью

import gc
import torch

def clear_memory():
    gc.collect()
    torch.cuda.empty_cache()

# Вызывать между большими батчами
for batch in batches:
    process_batch(batch)
    clear_memory()

4. Сохраняйте промежуточные результаты

# Всегда делайте чекпоинты для долгих задач
CHECKPOINT_INTERVAL = 100

for i, item in enumerate(items):
    results.append(process(item))

    if i % CHECKPOINT_INTERVAL == 0:
        save_checkpoint(results, i)

Оптимизация затрат

Оцените перед запуском

def estimate_cost(num_items, time_per_item_sec, hourly_rate):
    total_hours = (num_items * time_per_item_sec) / 3600
    total_cost = total_hours * hourly_rate
    return total_hours, total_cost

# Пример: 10 000 изображений по 3 с каждое на RTX 4090
hours, cost = estimate_cost(10000, 3, 0.10)
print(f"Estimated: {hours:.1f} hours, ${cost:.2f}")
# Вывод: Estimated: 8.3 hours, $0.83

Используйте прерываемые (spot) инстансы

На 30–50% дешевле
Хорошо подходит для пакетных задач (прерываемых)
Часто сохраняйте чекпоинты

Обработка в часы низкого спроса

Ставьте задания в очередь в периоды низкого спроса
Часто лучше доступность GPU
Возможные более низкие цены на spot-инстансы

Дальнейшие шаги

Интеграция API - Создавайте свои API
Настройка Multi-GPU - Масштабируйтесь
Калькулятор затрат - Оцените затраты

ПредыдущаяИнтеграция API СледующаяОбзор

Последнее обновление 1 день назад

Это было полезно?

hashtagКогда использовать пакетную обработку

hashtagПакетная обработка LLM

hashtagvLLM Batch API

hashtagАсинхронная пакетная обработка (быстрее)

hashtagБатч с отслеживанием прогресса

hashtagСохранение прогресса для долгих батчей

hashtagГенерация изображений пакетно

hashtagSD WebUI пакетная обработка

hashtagComfyUI пакет с очередью

hashtagFLUX пакетная обработка

hashtagПакетная обработка аудио

hashtagWhisper пакетная транскрипция

hashtagПараллельный Whisper (несколько GPU)

hashtagПакетная обработка видео

hashtagПакетная генерация видео (SVD)

hashtagШаблоны конвейера данных

hashtagШаблон Производитель-Потребитель

hashtagШаблон Map-Reduce

hashtagСоветы по оптимизации

hashtag1. Правильная настройка параллелизма

hashtag2. Настройка размера батча

hashtag3. Управление памятью

hashtag4. Сохраняйте промежуточные результаты

hashtagОптимизация затрат

hashtagОцените перед запуском

hashtagИспользуйте прерываемые (spot) инстансы

hashtagОбработка в часы низкого спроса

hashtagДальнейшие шаги

Когда использовать пакетную обработку

Пакетная обработка LLM

vLLM Batch API

Асинхронная пакетная обработка (быстрее)

Батч с отслеживанием прогресса

Сохранение прогресса для долгих батчей

Генерация изображений пакетно

SD WebUI пакетная обработка

ComfyUI пакет с очередью

FLUX пакетная обработка

Пакетная обработка аудио

Whisper пакетная транскрипция

Параллельный Whisper (несколько GPU)

Пакетная обработка видео

Пакетная генерация видео (SVD)

Шаблоны конвейера данных

Шаблон Производитель-Потребитель

Шаблон Map-Reduce

Советы по оптимизации

1. Правильная настройка параллелизма

2. Настройка размера батча

3. Управление памятью

4. Сохраняйте промежуточные результаты

Оптимизация затрат

Оцените перед запуском

Используйте прерываемые (spot) инстансы

Обработка в часы низкого спроса

Дальнейшие шаги