defmodule Grazer.HTML do
  def distill(raw) when is_binary(raw) do
    {:ok, parsed} = Floki.parse_document(raw)
    # there better be only one body
    [{_tag, _attrs, children}] = Floki.find(parsed, "body")
    {:ok, flatten(children)}
  end

  defp flatten(children) do
    f = flatten([], children)
    #IO.puts(inspect(f))
    f
  end

  defp flatten(acc, []) do
    acc
  end

  defp flatten(acc, [{:comment, _} | rest]) do
    flatten(acc, rest)
  end

  defp flatten(acc, [{"script", _, _} | rest]) do
    flatten(acc, rest)
  end

  defp flatten(acc, [{"h1", _, children} | rest]) do
    acc ++ [[:h1, [], [text(children)]]]
    |> flatten(rest)
  end

  defp flatten(acc, [{"h2", _, children} | rest]) do
    acc ++ [[:h1, [], [text(children)]]]
    |> flatten(rest)
  end

  defp flatten(acc, [{"h3", _, children} | rest]) do
    acc ++ [[:h1, [], [text(children)]]]
    |> flatten(rest)
  end

  defp flatten(acc, [{"br", _, _} | rest]) do
    acc ++ [[:p, [], [""]]]
    |> flatten(rest)
  end

  defp flatten(acc, [{"hr", _, _} | rest]) do
    acc ++ [[:p, [], ["โ€”โ€”โ€”"]]]
    |> flatten(rest)
  end

  defp flatten(acc, [{"ul", _, children} | rest]) do
    acc ++ list(children)
    |> flatten(rest)
  end

  defp flatten(acc, [{"ol", _, children} | rest]) do
    acc ++ list(children)
    |> flatten(rest)
  end

  defp flatten(acc, [{"p", _, children} | rest]) do
      #acc ++ [[:p, [], interleave(" ", flatten([], children))]]
      acc ++ [[:p, [], flatten([], children)]]
      |> flatten(rest)
  end

  defp flatten(acc, [{"a", attrs, children} | rest]) do
    #IO.puts(inspect(attrs))
    acc ++ [[:a, [href: attr(attrs, "href")], [text(children)]]]
    |> flatten(rest)
  end

  defp flatten(acc, [{"strong", _, children} | rest]) do
    acc ++ ["**" <> text(children) <> "**"]
    |> flatten(rest)
  end

  defp flatten(acc, [{"b", _, children} | rest]) do
    acc ++ ["**" <> text(children) <> "**"]
    |> flatten(rest)
  end

  defp flatten(acc, [{"em", _, children} | rest]) do
    acc ++ ["__" <> text(children) <> "__"]
    |> flatten(rest)
  end

  defp flatten(acc, [{"i", _, children} | rest]) do
    acc ++ ["__" <> text(children) <> "__"]
    |> flatten(rest)
  end

  defp flatten(acc, [text | rest]) when is_binary(text) do
    acc ++ [text([text])]
    |> flatten(rest)
  end

  defp flatten(acc, [{tag, _attrs, children} | rest]) do
    if is_divvy(tag) do
      flatten(acc, children)
      |> flatten(rest)
    else
      #IO.puts(inspect({:ignore, tag}))
      flatten(acc, rest)
    end
  end

  defp is_divvy("aside")      do true end
  defp is_divvy("blockquote") do true end
  defp is_divvy("div")        do true end
  defp is_divvy("footer")     do true end
  defp is_divvy("header")     do true end
  defp is_divvy("main")       do true end
  defp is_divvy("nav")        do true end
  defp is_divvy("noscript")   do true end
  defp is_divvy("section")    do true end
  defp is_divvy("span")       do true end
  defp is_divvy(_)            do false end

  defp text([]) do
    ""
  end

  defp text([{_, _, children} | rest]) do
    text(children) <> text(rest)
  end

  defp text([str | rest]) when is_binary str do
    Regex.replace(~r/\A\s+/m, str, " ") <> text(rest)
  end

  defp list([]) do
    []
  end

  defp list([{"li", _, children} | rest]) do
    [[:p, [], [" ยท " | flatten(children)]] | list(rest)]
  end

  defp interleave(sp, []) do
    []
  end

  defp interleave(sp, [x]) do
    [x]
  end

  defp interleave(sp, [x | [y | rest]]) do
    [x | [sp | interleave(sp, [y | rest])]]
  end

  defp attr(kvs, attr, default \\ nil)

  defp attr([], _, default) do
    default
  end

  defp attr([{k, v} | _], attr, _) when k == attr do
    v
  end

  defp attr([_ | rest], attr, default) do
    attr(rest, attr, default)
  end
end