picopackage/lib/picopackage/provider.rb

require "time"

module Picopackage
  class Provider
    def self.for(url)
      PROVIDERS.each do |provider|
        case provider.handles_url?(url)
        when false
          next
        when true
          return provider.new(url)
        when :maybe
          instance = provider.new(url)
          return instance if instance.handles_body?
        end
      end
      nil  # Return nil if no provider found
    end
  end

  # Base class for fetching content from a URL
  # The variable `body` will contain the package_data retrieved from the URL
  # The variable `package_data` will contain both and payload + metadata - this would be writen to a file.
  # The variable `payload` will contain the payload extracted from `package_data`
  # The variable `metadata` will contain the metadata extracted from `package_data`

  # Job of the Provider class is to fetch the body from the URL, and then extract the package_data
  # and the filename from the body. The SourceFile class will then take the body and split it into payload and metadata

  class DefaultProvider
    MAX_SIZE = 1024 * 1024
    TIMEOUT = 10
    attr_reader :url

    def self.handles_url?(url) = :maybe

    def initialize(url)
      @url = transform_url(url)
      @uri = URI(@url)
      @body = nil
      @content = nil
    end

    def body = @body ||= fetch

    def json_body = @json_body ||= JSON.parse(body)

    def transform_url(url) = url

    def fetch
      Net::HTTP.start(@uri.host, @uri.port, use_ssl: @uri.scheme == "https", read_timeout: TIMEOUT, open_timeout: TIMEOUT) do |http|
        http.request_get(@uri.path) do |response|
          raise "Unexpected response: #{response.code}" unless response.is_a?(Net::HTTPSuccess)

          @body = String.new(capacity: MAX_SIZE)
          response.read_body do |chunk|
            if @body.bytesize + chunk.bytesize > MAX_SIZE
              raise FileTooLargeError, "Response would exceed #{MAX_SIZE} bytes"
            end
            @body << chunk
          end
          @body
        end
      end

      @body
    end

    def handles_body?
      true
    rescue FileTooLargeError, Net::HTTPError, RuntimeError
      false
    end

    # Implement in subclass - this come from the `body`.
    # Spliting content into payload and metadata is the job of the SourceFile class
    def content = body

    # Implement in subclass - this should return the filename extracted from the body - if it exists, but not from the metadata
    def filename = File.basename @url

    def source_file
      @source_file ||= SourceFile.from_content(content, metadata: {"filename" => filename, "url" => url, "packaged_at" => packaged_at}.compact)
    end
  end

  class GithubGistProvider < DefaultProvider
    def self.handles_url?(url) = url.match?(%r{gist\.github\.com})

    def content = json_body["files"].values.first["content"]

    def filename = json_body["files"].values.first["filename"]

    def transform_url(url)
      gist_id = url[/gist\.github\.com\/[^\/]+\/([a-f0-9]+)/, 1]
      "https://api.github.com/gists/#{gist_id}"
    end

    def packaged_at
      Time.parse(json_body["created_at"])
    rescue ArgumentError
      nil
    end
  end

  class OpenGistProvider < DefaultProvider
    def handles_url?(url) = :maybe

    def transform_url(url) = "#{url}.json"

    def content = json_body.dig("files", 0, "content")

    def filename = json_body.dig("files", 0, "filename")

    def handles_body?
      content && filename
    rescue FileTooLargeError, Net::HTTPError, RuntimeError
      false
    end
    # If we successfully fetch the body, and the body contains content and a filename, then we can handle the body
  end

  PROVIDERS = [
    GithubGistProvider,
    OpenGistProvider,
    DefaultProvider
  ].freeze
end