///usr/bin/env jbang "$0" "$@" ; exit $?
//
// This is a JBang script. You need JBang to run this script.
//
// To learn more go to https://www.jbang.dev and install JBang.
//
// Run this script:
// ./ghost2md.java -f -d
//
//DEPS info.picocli:picocli:4.7.7
//DEPS com.jayway.jsonpath:json-path:2.8.0
//DEPS com.fasterxml.jackson.core:jackson-databind:2.19.0
//DEPS io.github.furstenheim:copy_down:1.1
//JAVA 21+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.ParseContext;
import com.jayway.jsonpath.TypeRef;
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
import io.github.furstenheim.CopyDown;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
import static java.util.Optional.ofNullable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.nio.file.Paths;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.Callable;
import java.util.regex.Pattern;
@Command(name = "ghost2md", mixinStandardHelpOptions = true, version = "0.1", description = "Converts a Ghost export file to markdown files")
public class ghost2md implements Callable {
private static final ParseContext JSON = JsonPath.using(Configuration.builder().mappingProvider(new JacksonMappingProvider()).build());
private static final CopyDown HTML_TO_MD = new CopyDown();
@Option(names = { "--file", "-f" }, required = true, description = "The exported Ghost JSON file")
private File exportFile;
@Option(names = { "--dir", "-d" }, defaultValue = "exported", description = "The destination directory")
private File destinationDir;
@Option(names = { "--type", "-t" }, defaultValue = "all", description = "Type of entries to export (all,page,post)")
private String type;
@Option(names = { "--domain" }, defaultValue = "", description = "The domain of Ghost blog (used to download linked resources)")
private String domain;
@Override
public Integer call() throws Exception {
if (!exportFile.exists()) {
System.err.printf("%s does not exist%n", exportFile.getAbsolutePath());
return 404;
}
destinationDir.mkdirs();
var json = JSON.parse(exportFile);
var posts = json.read("$.db[*].data.posts[*]", new TypeRef>(){});
var postMeta = json.read("$.db[*].data.posts_meta[*]", new TypeRef>(){});
var tags = json.read("$.db[*].data.tags[*]", new TypeRef>(){});
var tagLinks = json.read("$.db[*].data.posts_tags[*]", new TypeRef>(){});
var authors = json.read("$.db[*].data.users[*]", new TypeRef>(){});
var authorLinks = json.read("$.db[*].data.posts_authors[*]", new TypeRef>(){});
for (var post : posts) {
if (!"all".equals(type) && !post.type().equals(type)) {
continue;
}
var filename = post.slug() + ".md";
var markdown = HTML_TO_MD.convert(post.html());
var file = new File(destinationDir, filename);
var title = "# " + post.title();
var meta = postMeta.stream()
.filter(link -> link.post_id().equals(post.id()))
.findFirst();
var linkedTags = tagLinks.stream()
.filter(link -> link.post_id().equals(post.id()))
.flatMap(link -> tags.stream().filter(tag -> tag.id().equals(link.tag_id())))
.toList();
var linkedAuthors = authorLinks.stream()
.filter(link -> link.post_id().equals(post.id()))
.flatMap(link -> authors.stream().filter(author -> author.id().equals(link.author_id())))
.toList();
System.out.printf("Writing %s\n", file);
try (var writer = new PrintWriter(new FileWriter(file), true)) {
writer.println("---");
writer.println("date:");
ofNullable(post.created_at()).ifPresent(date -> writer.println(" created: " + date));
ofNullable(post.updated_at()).ifPresent(date -> writer.println(" updated: " + date));
ofNullable(post.published_at()).ifPresent(date -> writer.println(" published: " + date));
if (post.custom_excerpt() != null) {
writer.println("excerpt: \"" + post.custom_excerpt() + "\"");
} else if(meta.isPresent() && meta.get().title().isPresent()) {
writer.println("excerpt: \"" + meta.get().title().get() + "\"");
} else {
writer.println("excerpt: \"" + post.title() + "\"");
}
if (post.featured() > 0) {
writer.println("pin: true");
}
if (post.feature_image() != null) {
var url = post.feature_image();
if (url.startsWith("__GHOST_URL__")) {
url = downloadAsset(url.replace("__GHOST_URL__", domain));
}
writer.println("cover: " + url);
} else if (meta.isPresent() && meta.get().image().isPresent()) {
var url = meta.get().image().get();
if (url.startsWith("__GHOST_URL__")) {
url = downloadAsset(url.replace("__GHOST_URL__", domain));
}
writer.println("cover: " + url);
} else if(!linkedTags.isEmpty()){
var url = linkedTags.stream()
.filter(tag -> tag.feature_image() != null)
.map(tag -> tag.feature_image())
.findFirst()
.get();
if (url.startsWith("__GHOST_URL__")) {
url = downloadAsset(url.replace("__GHOST_URL__", domain));
}
writer.println("cover: " + url);
}
if (post.status.equals("draft")) {
writer.println("draft: true");
}
if (!linkedTags.isEmpty() || post.featured() > 0) {
writer.println("tags:");
for (var tag : linkedTags) {
writer.println(" - slug: " + tag.slug());
writer.println(" title: \"" + tag.name() + "\"");
}
}
if (!linkedAuthors.isEmpty()) {
writer.println("authors:");
for (var author : linkedAuthors) {
writer.println(" - slug: " + author.slug());
writer.println(" name: \"" + author.name() + "\"");
}
}
writer.println("---");
writer.println();
writer.println(title);
writer.println();
var internalLinkPattern = Pattern.compile("\\(__GHOST_URL__(.*)(#(.*))?\\)");
var internalLinks = internalLinkPattern.matcher(markdown);
markdown = internalLinks.replaceAll(result -> {
var link = result.group(1);
var anchor = result.group(2);
if (link.startsWith("/content") || link.startsWith("/api")) {
return "(" + downloadAsset(domain + link) + ")";
}
if (link.startsWith("/")) {
link = link.substring(1);
}
if (link.endsWith("/")) {
link = link.substring(0, link.length() -1);
}
if (anchor != null) {
return "(" + link + ".md" + "#" +anchor + ")";
}
System.out.println(link);
return "(" + link + ".md)";
});
if(markdown.contains("__GHOST_URL__")) {
System.out.println("Failed to replace all links in markdown in " + filename);
System.out.println(markdown);
}
writer.println(markdown);
}
}
return 0;
}
@JsonIgnoreProperties(ignoreUnknown = true)
record Post(String id, String slug, String title, String html, String created_at, String updated_at, String published_at, String type, int featured, String feature_image, String status, String custom_excerpt){}
@JsonIgnoreProperties(ignoreUnknown = true)
record PostMeta(String post_id, String meta_title, String meta_description, String twitter_image, String og_image){
public Optional title() {
return Optional.ofNullable(meta_title).or(() -> Optional.ofNullable(meta_description));
}
public Optional image() {
return Optional.ofNullable(og_image).or(() -> Optional.ofNullable(twitter_image));
}
}
@JsonIgnoreProperties(ignoreUnknown = true)
record Tag(String id, String slug, String name, String description, String feature_image){}
@JsonIgnoreProperties(ignoreUnknown = true)
record TagLink(String post_id, String tag_id){}
@JsonIgnoreProperties(ignoreUnknown = true)
record Author(String id, String slug, String name){}
@JsonIgnoreProperties(ignoreUnknown = true)
record AuthorLink(String post_id, String author_id){}
public static void main(String[] args) {
final int exitCode = new CommandLine(new ghost2md()).execute(args);
System.exit(exitCode);
}
private String downloadAsset(String uriString) {
var assetDir = new File(destinationDir, "assets");
if (!assetDir.exists()) {
assetDir.mkdirs();
}
var uri = URI.create(uriString);
var file = new File(assetDir, Paths.get(uri.getPath()).getFileName().toString());
if (file.exists()) {
return "assets/" + file.getName();
}
System.out.println("Downloading " + uriString);
try (var out = new FileOutputStream(file)) {
var conn = uri.toURL().openConnection();
try (var stream = conn.getInputStream()) {
stream.transferTo(out);
}
} catch (IOException e) {
System.err.println("Failed to download file " + uriString + ". " + e.getMessage());
return uriString;
}
return "assets/" + file.getName();
}
}