adrianshort
/
gist-e114d3f07ff026e844c5
镜像来自 https://gist.github.com/e114d3f07ff026e844c5.git


			
				
					
						
						
							
							# Convert Jekyll blog posts to DokuWiki pages
# Adrian Short (https://adrianshort.org/) 15 Feb 2015

require 'fileutils'
require 'yaml'
require 'pp'
require 'pandoc-ruby'

INPUT_DIR = "./_posts"
OUTPUT_BASEDIR = "./blog"

unpublished_files = [] # collect drafts, i.e. files where published == false

# Loop through Markdown files
Dir.glob(File.join(INPUT_DIR, "*.{md,markdown}")) do |fn|
  f = File.open(fn)
  contents = f.read
  metadata = YAML.load(contents)

  # skip drafts
  if metadata['published'] == false
    unpublished_files << metadata['title']
    next
  end

  output = "====== %s ======\n\n" % metadata['title']
  output += PandocRuby.convert(contents, :from => :markdown, :to => :dokuwiki)
  
  # convert <!-- more --> tags
  output.gsub!(/<HTML>\n<!-- more -->\n<\/HTML>/, '===== =====')

  # fix blockquotes (I'm using the DokuWiki blockquote plugin)
  output.gsub!(/<HTML><blockquote>\n(.+)<\/blockquote><\/HTML>/m, \
    "<blockquote>\n\\1</blockquote>\n")

  # Merge categories and tags
  tags = metadata['tags'] || []
  categories = metadata['categories'] || []
  tags = tags.concat(categories).uniq

  if tags
    # wrap tags containing spaces in double quotes
    output += "{{tag>%s}}\n" % tags \
      .map{ |t| t.include?(' ') ? "\"%s\"" % t : t } \
      .sort_by(&:downcase).join(' ')
  end
  f.close

  # write to new file
  out_dir = File.join(OUTPUT_BASEDIR, metadata['date'].year.to_s)
  FileUtils.mkdir_p out_dir

  out_fn = File.join(out_dir, fn.match(/\d{4}-\d\d-\d\d-(.+)\./)[1] + '.txt')
  out_f = File.open(out_fn, 'w') { |f| f << output }

  # Set the modified and last access time for the file
  # Use `cp -p` to preserve these times when copying
  File.utime(metadata['date'], metadata['date'], out_fn)
end

puts "Drafts skipped:"
unpublished_files.each{ |f| puts f }