# Convert Jekyll blog posts to DokuWiki pages # Adrian Short (https://adrianshort.org/) 15 Feb 2015 require 'fileutils' require 'yaml' require 'pp' require 'pandoc-ruby' INPUT_DIR = "./_posts" OUTPUT_BASEDIR = "./blog" unpublished_files = [] # collect drafts, i.e. files where published == false # Loop through Markdown files Dir.glob(File.join(INPUT_DIR, "*.{md,markdown}")) do |fn| f = File.open(fn) contents = f.read metadata = YAML.load(contents) # skip drafts if metadata['published'] == false unpublished_files << metadata['title'] next end output = "====== %s ======\n\n" % metadata['title'] output += PandocRuby.convert(contents, :from => :markdown, :to => :dokuwiki) # convert tags output.gsub!(/\n\n<\/HTML>/, '===== =====') # fix blockquotes (I'm using the DokuWiki blockquote plugin) output.gsub!(/
\n(.+)<\/blockquote><\/HTML>/m, \ "
\n\\1
\n") # Merge categories and tags tags = metadata['tags'] || [] categories = metadata['categories'] || [] tags = tags.concat(categories).uniq if tags # wrap tags containing spaces in double quotes output += "{{tag>%s}}\n" % tags \ .map{ |t| t.include?(' ') ? "\"%s\"" % t : t } \ .sort_by(&:downcase).join(' ') end f.close # write to new file out_dir = File.join(OUTPUT_BASEDIR, metadata['date'].year.to_s) FileUtils.mkdir_p out_dir out_fn = File.join(out_dir, fn.match(/\d{4}-\d\d-\d\d-(.+)\./)[1] + '.txt') out_f = File.open(out_fn, 'w') { |f| f << output } # Set the modified and last access time for the file # Use `cp -p` to preserve these times when copying File.utime(metadata['date'], metadata['date'], out_fn) end puts "Drafts skipped:" unpublished_files.each{ |f| puts f }