///usr/bin/env jbang "$0" "$@" ; exit $? // // This is a JBang script. You need JBang to run this script. // // To learn more go to https://www.jbang.dev and install JBang. // // Run this script: // ./ghost2md.java -f -d // //DEPS info.picocli:picocli:4.7.7 //DEPS com.jayway.jsonpath:json-path:2.8.0 //DEPS com.fasterxml.jackson.core:jackson-databind:2.19.0 //DEPS io.github.furstenheim:copy_down:1.1 //JAVA 21+ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.jayway.jsonpath.Configuration; import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.ParseContext; import com.jayway.jsonpath.TypeRef; import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; import io.github.furstenheim.CopyDown; import picocli.CommandLine; import picocli.CommandLine.Command; import picocli.CommandLine.Option; import static java.util.Optional.ofNullable; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.net.URI; import java.nio.file.Paths; import java.util.List; import java.util.Optional; import java.util.concurrent.Callable; import java.util.regex.Pattern; @Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files") public class ghost2md implements Callable { private static final ParseContext JSON = JsonPath.using(Configuration.builder().mappingProvider(new JacksonMappingProvider()).build()); private static final CopyDown HTML_TO_MD = new CopyDown(); @Option(names = { "--file", "-f" }, required = true, description = "The exported Ghost JSON file") private File exportFile; @Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory") private File destinationDir; @Option(names = { "--type", "-t" }, defaultValue = "all", description = "Type of entries to export (all,page,post)") private String type; @Option(names = { "--domain" }, defaultValue = "", description = "The domain of Ghost blog (used to download linked resources)") private String domain; @Override public Integer call() throws Exception { if (!exportFile.exists()) { System.err.printf("%s does not exist%n", exportFile.getAbsolutePath()); return 404; } destinationDir.mkdirs(); var json = JSON.parse(exportFile); var posts = json.read("$.db[*].data.posts[*]", new TypeRef>(){}); var postMeta = json.read("$.db[*].data.posts_meta[*]", new TypeRef>(){}); var tags = json.read("$.db[*].data.tags[*]", new TypeRef>(){}); var tagLinks = json.read("$.db[*].data.posts_tags[*]", new TypeRef>(){}); var authors = json.read("$.db[*].data.users[*]", new TypeRef>(){}); var authorLinks = json.read("$.db[*].data.posts_authors[*]", new TypeRef>(){}); for (var post : posts) { if (!"all".equals(type) && !post.type().equals(type)) { continue; } var filename = post.slug() + ".md"; var markdown = HTML_TO_MD.convert(post.html()); var file = new File(destinationDir, filename); var title = "# " + post.title(); var meta = postMeta.stream() .filter(link -> link.post_id().equals(post.id())) .findFirst(); var linkedTags = tagLinks.stream() .filter(link -> link.post_id().equals(post.id())) .flatMap(link -> tags.stream().filter(tag -> tag.id().equals(link.tag_id()))) .toList(); var linkedAuthors = authorLinks.stream() .filter(link -> link.post_id().equals(post.id())) .flatMap(link -> authors.stream().filter(author -> author.id().equals(link.author_id()))) .toList(); System.out.printf("Writing %s\n", file); try (var writer = new PrintWriter(new FileWriter(file), true)) { writer.println("---"); writer.println("date:"); ofNullable(post.created_at()).ifPresent(date -> writer.println(" created: " + date)); ofNullable(post.updated_at()).ifPresent(date -> writer.println(" updated: " + date)); ofNullable(post.published_at()).ifPresent(date -> writer.println(" published: " + date)); if (post.custom_excerpt() != null) { writer.println("excerpt: \"" + post.custom_excerpt() + "\""); } else if(meta.isPresent() && meta.get().title().isPresent()) { writer.println("excerpt: \"" + meta.get().title().get() + "\""); } else { writer.println("excerpt: \"" + post.title() + "\""); } if (post.featured() > 0) { writer.println("pin: true"); } if (post.feature_image() != null) { var url = post.feature_image(); if (url.startsWith("__GHOST_URL__")) { url = downloadAsset(url.replace("__GHOST_URL__", domain)); } writer.println("cover: " + url); } else if (meta.isPresent() && meta.get().image().isPresent()) { var url = meta.get().image().get(); if (url.startsWith("__GHOST_URL__")) { url = downloadAsset(url.replace("__GHOST_URL__", domain)); } writer.println("cover: " + url); } else if(!linkedTags.isEmpty()){ var url = linkedTags.stream() .filter(tag -> tag.feature_image() != null) .map(tag -> tag.feature_image()) .findFirst() .get(); if (url.startsWith("__GHOST_URL__")) { url = downloadAsset(url.replace("__GHOST_URL__", domain)); } writer.println("cover: " + url); } if (post.status.equals("draft")) { writer.println("draft: true"); } if (!linkedTags.isEmpty() || post.featured() > 0) { writer.println("tags:"); for (var tag : linkedTags) { writer.println(" - slug: " + tag.slug()); writer.println(" title: \"" + tag.name() + "\""); } } if (!linkedAuthors.isEmpty()) { writer.println("authors:"); for (var author : linkedAuthors) { writer.println(" - slug: " + author.slug()); writer.println(" name: \"" + author.name() + "\""); } } writer.println("---"); writer.println(); writer.println(title); writer.println(); var internalLinkPattern = Pattern.compile("\\(__GHOST_URL__(.*)(#(.*))?\\)"); var internalLinks = internalLinkPattern.matcher(markdown); markdown = internalLinks.replaceAll(result -> { var link = result.group(1); var anchor = result.group(2); if (link.startsWith("/content") || link.startsWith("/api")) { return "(" + downloadAsset(domain + link) + ")"; } if (link.startsWith("/")) { link = link.substring(1); } if (link.endsWith("/")) { link = link.substring(0, link.length() -1); } if (anchor != null) { return "(" + link + ".md" + "#" +anchor + ")"; } System.out.println(link); return "(" + link + ".md)"; }); if(markdown.contains("__GHOST_URL__")) { System.out.println("Failed to replace all links in markdown in " + filename); System.out.println(markdown); } writer.println(markdown); } } return 0; } @JsonIgnoreProperties(ignoreUnknown = true) record Post(String id, String slug, String title, String html, String created_at, String updated_at, String published_at, String type, int featured, String feature_image, String status, String custom_excerpt){} @JsonIgnoreProperties(ignoreUnknown = true) record PostMeta(String post_id, String meta_title, String meta_description, String twitter_image, String og_image){ public Optional title() { return Optional.ofNullable(meta_title).or(() -> Optional.ofNullable(meta_description)); } public Optional image() { return Optional.ofNullable(og_image).or(() -> Optional.ofNullable(twitter_image)); } } @JsonIgnoreProperties(ignoreUnknown = true) record Tag(String id, String slug, String name, String description, String feature_image){} @JsonIgnoreProperties(ignoreUnknown = true) record TagLink(String post_id, String tag_id){} @JsonIgnoreProperties(ignoreUnknown = true) record Author(String id, String slug, String name){} @JsonIgnoreProperties(ignoreUnknown = true) record AuthorLink(String post_id, String author_id){} public static void main(String[] args) { final int exitCode = new CommandLine(new ghost2md()).execute(args); System.exit(exitCode); } private String downloadAsset(String uriString) { var assetDir = new File(destinationDir, "assets"); if (!assetDir.exists()) { assetDir.mkdirs(); } var uri = URI.create(uriString); var file = new File(assetDir, Paths.get(uri.getPath()).getFileName().toString()); if (file.exists()) { return "assets/" + file.getName(); } System.out.println("Downloading " + uriString); try (var out = new FileOutputStream(file)) { var conn = uri.toURL().openConnection(); try (var stream = conn.getInputStream()) { stream.transferTo(out); } } catch (IOException e) { System.err.println("Failed to download file " + uriString + ". " + e.getMessage()); return uriString; } return "assets/" + file.getName(); } }