|
- # Convert Jekyll blog posts to DokuWiki pages
- # Adrian Short (https://adrianshort.org/) 15 Feb 2015
-
- require 'fileutils'
- require 'yaml'
- require 'pp'
- require 'pandoc-ruby'
-
- INPUT_DIR = "./_posts"
- OUTPUT_BASEDIR = "./blog"
-
- unpublished_files = [] # collect drafts, i.e. files where published == false
-
- # Loop through Markdown files
- Dir.glob(File.join(INPUT_DIR, "*.{md,markdown}")) do |fn|
- f = File.open(fn)
- contents = f.read
- metadata = YAML.load(contents)
-
- # skip drafts
- if metadata['published'] == false
- unpublished_files << metadata['title']
- next
- end
-
- output = "====== %s ======\n\n" % metadata['title']
- output += PandocRuby.convert(contents, :from => :markdown, :to => :dokuwiki)
-
- # convert <!-- more --> tags
- output.gsub!(/<HTML>\n<!-- more -->\n<\/HTML>/, '===== =====')
-
- # fix blockquotes (I'm using the DokuWiki blockquote plugin)
- output.gsub!(/<HTML><blockquote>\n(.+)<\/blockquote><\/HTML>/m, \
- "<blockquote>\n\\1</blockquote>\n")
-
- # Merge categories and tags
- tags = metadata['tags'] || []
- categories = metadata['categories'] || []
- tags = tags.concat(categories).uniq
-
- if tags
- # wrap tags containing spaces in double quotes
- output += "{{tag>%s}}\n" % tags \
- .map{ |t| t.include?(' ') ? "\"%s\"" % t : t } \
- .sort_by(&:downcase).join(' ')
- end
- f.close
-
- # write to new file
- out_dir = File.join(OUTPUT_BASEDIR, metadata['date'].year.to_s)
- FileUtils.mkdir_p out_dir
-
- out_fn = File.join(out_dir, fn.match(/\d{4}-\d\d-\d\d-(.+)\./)[1] + '.txt')
- out_f = File.open(out_fn, 'w') { |f| f << output }
-
- # Set the modified and last access time for the file
- # Use `cp -p` to preserve these times when copying
- File.utime(metadata['date'], metadata['date'], out_fn)
- end
-
- puts "Drafts skipped:"
- unpublished_files.each{ |f| puts f }
|