结合wordpress rest api使用python将所有文章中的图片重新生成

这个python脚本实现的功能:
1. 将文章中所有外链图片下载并上传到媒体库中;
2. 生成必要的缩略图;
3. 将文章中第一张图设置为图片特色图片;

适合文章图片使用了第三方图库或者其他存储,现在想放在媒体库中存储。

使用

在【用户】=> 【个人资料】的底部,添加一个应用程序密码。按要求配置脚本中的WP_SITE_URLWP_USERNAMEWP_APP_PASSWORD

友情提示:使用脚本前,请一定备份数据库。

# -*- coding: utf-8 -*-
import requests
from requests.auth import HTTPBasicAuth
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO

# 配置
# 网站域名,比如:https://www.some-domain.com
WP_SITE_URL = ""
# 拥有所有文章编辑权限的用户名
WP_USERNAME = ""
# 应用程序密码
WP_APP_PASSWORD = ""

# 基础认证
auth = HTTPBasicAuth(WP_USERNAME, WP_APP_PASSWORD)

def get_image_urls_from_post(content):
    """从文章内容中提取所有图片URL"""
    soup = BeautifulSoup(content, "html.parser")
    imgs = [img['src'] for img in soup.find_all('img')]
    res = []
    for img in imgs:
        if 'qpic.cn' in img:
            continue
        if not img.startswith(WP_SITE_URL):
            res.append(img)
    return res

def download_image(url):
    """下载图片并返回图像对象"""
    try:
        response = requests.get(url)
        img = Image.open(BytesIO(response.content))
        return img
    except Exception as e:
        print(f"图片下载失败: {url}")
        return None

def upload_image_to_wordpress(image, filename):
    """上传图片到WordPress媒体库"""
    buffer = BytesIO()
    image.save(buffer, format=image.format)
    buffer.seek(0)

    headers = {
        'Content-Disposition': f'attachment; filename={filename}',
        'Content-Type': f'image/{image.format.lower()}'
    }
    response = requests.post(
        f"{WP_SITE_URL}/wp-json/wp/v2/media",
        headers=headers,
        data=buffer,
        auth=auth
    )

    response.raise_for_status()
    return response.json()

def update_post_with_new_images(post_id, content, image_map, featured_media_id=None):
    """更新文章内容中的图片URL并设置特色图片"""
    for old_url, new_url in image_map.items():
        content = content.replace(old_url, new_url)

    data = {'content': content}
    if featured_media_id:
        data['featured_media'] = featured_media_id

    response = requests.post(
        f"{WP_SITE_URL}/wp-json/wp/v2/posts/{post_id}",
        json=data,
        auth=auth
    )

    response.raise_for_status()
    return response.json()

def get_all_posts():
    """逐页获取所有文章"""
    page = 1
    while True:
        response = requests.get(
            f"{WP_SITE_URL}/wp-json/wp/v2/posts",
            auth=auth,
            params={'page': page, 'per_page': 10}  # 获取每页10篇文章
        )

        if response.status_code == 200:
            current_page_posts = response.json()
            if not current_page_posts:
                break
            for post in current_page_posts:
                yield post
            page += 1
        else:
            print(f"获取文章时出错: {response.status_code} {response.text}")
            break

# 获取所有文章
for post in get_all_posts():
    post_id = post['id']
    post_content = post['content']['rendered']
    print(f"正在处理文章 {post_id}")
    image_urls = get_image_urls_from_post(post_content)
    print(f"提取的图片URL: {image_urls}")
    if not image_urls:
        print("此文章没有图片,跳过")
        continue  # 如果没有图片,跳过此文章
    image_map = {}
    first_image = True
    featured_media_id = None
    for url in image_urls:
        img = download_image(url)
        print(f"下载的图片类型: {type(img)}")
        filename = url.split('/')[-1]
        print(f"图片文件名: {filename}")
        if not img:
            continue
        upload_response = upload_image_to_wordpress(img, filename)
        image_map[url] = upload_response['source_url']
        # 设置第一张图片为特色图片
        if first_image:
            featured_media_id = upload_response['id']
            first_image = False
    if image_map:
        update_post_with_new_images(post_id, post_content, image_map, featured_media_id)
结合wordpress rest api使用python将所有文章中的图片重新生成

原文链接:https://beltxman.com/4263.html,若无特殊说明本站内容为 行星带 原创,未经同意禁止转载。

发表评论

您的电子邮箱地址不会被公开。

Scroll to top