129 lines
3.7 KiB
Ruby
129 lines
3.7 KiB
Ruby
require "time"
|
|
|
|
module Picopackage
|
|
class Provider
|
|
def self.for(url)
|
|
PROVIDERS.each do |provider|
|
|
case provider.handles_url?(url)
|
|
when false
|
|
next
|
|
when true
|
|
return provider.new(url)
|
|
when :maybe
|
|
instance = provider.new(url)
|
|
return instance if instance.handles_body?
|
|
end
|
|
end
|
|
nil # Return nil if no provider found
|
|
end
|
|
end
|
|
|
|
# Base class for fetching content from a URL
|
|
# The variable `body` will contain the package_data retrieved from the URL
|
|
# The variable `package_data` will contain both and payload + metadata - this would be writen to a file.
|
|
# The variable `payload` will contain the payload extracted from `package_data`
|
|
# The variable `metadata` will contain the metadata extracted from `package_data`
|
|
|
|
# Job of the Provider class is to fetch the body from the URL, and then extract the package_data
|
|
# and the filename from the body. The SourceFile class will then take the body and split it into payload and metadata
|
|
|
|
class DefaultProvider
|
|
MAX_SIZE = 1024 * 1024
|
|
TIMEOUT = 10
|
|
attr_reader :url
|
|
|
|
def self.handles_url?(url) = :maybe
|
|
|
|
def initialize(url)
|
|
@url = transform_url(url)
|
|
@uri = URI(@url)
|
|
@body = nil
|
|
@content = nil
|
|
end
|
|
|
|
def body = @body ||= fetch
|
|
|
|
def json_body = @json_body ||= JSON.parse(body)
|
|
|
|
def transform_url(url) = url
|
|
|
|
def fetch
|
|
Net::HTTP.start(@uri.host, @uri.port, use_ssl: @uri.scheme == "https", read_timeout: TIMEOUT, open_timeout: TIMEOUT) do |http|
|
|
http.request_get(@uri.path) do |response|
|
|
raise "Unexpected response: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
|
|
@body = String.new(capacity: MAX_SIZE)
|
|
response.read_body do |chunk|
|
|
if @body.bytesize + chunk.bytesize > MAX_SIZE
|
|
raise FileTooLargeError, "Response would exceed #{MAX_SIZE} bytes"
|
|
end
|
|
@body << chunk
|
|
end
|
|
@body
|
|
end
|
|
end
|
|
|
|
@body
|
|
end
|
|
|
|
def handles_body?
|
|
true
|
|
rescue FileTooLargeError, Net::HTTPError, RuntimeError
|
|
false
|
|
end
|
|
|
|
# Implement in subclass - this come from the `body`.
|
|
# Spliting content into payload and metadata is the job of the SourceFile class
|
|
def content = body
|
|
|
|
# Implement in subclass - this should return the filename extracted from the body - if it exists, but not from the metadata
|
|
def filename = File.basename @url
|
|
|
|
def source_file
|
|
@source_file ||= SourceFile.from_content(content, metadata: {"filename" => filename, "url" => url, "packaged_at" => packaged_at}.compact)
|
|
end
|
|
end
|
|
|
|
class GithubGistProvider < DefaultProvider
|
|
def self.handles_url?(url) = url.match?(%r{gist\.github\.com})
|
|
|
|
def content = json_body["files"].values.first["content"]
|
|
|
|
def filename = json_body["files"].values.first["filename"]
|
|
|
|
def transform_url(url)
|
|
gist_id = url[/gist\.github\.com\/[^\/]+\/([a-f0-9]+)/, 1]
|
|
"https://api.github.com/gists/#{gist_id}"
|
|
end
|
|
|
|
def packaged_at
|
|
Time.parse(json_body["created_at"])
|
|
rescue ArgumentError
|
|
nil
|
|
end
|
|
end
|
|
|
|
class OpenGistProvider < DefaultProvider
|
|
def handles_url?(url) = :maybe
|
|
|
|
def transform_url(url) = "#{url}.json"
|
|
|
|
def content = json_body.dig("files", 0, "content")
|
|
|
|
def filename = json_body.dig("files", 0, "filename")
|
|
|
|
def handles_body?
|
|
content && filename
|
|
rescue FileTooLargeError, Net::HTTPError, RuntimeError
|
|
false
|
|
end
|
|
# If we successfully fetch the body, and the body contains content and a filename, then we can handle the body
|
|
end
|
|
|
|
PROVIDERS = [
|
|
GithubGistProvider,
|
|
OpenGistProvider,
|
|
DefaultProvider
|
|
].freeze
|
|
end
|