mirror of
https://github.com/dkam/probot.git
synced 2025-12-28 09:14:53 +00:00
Add code and tests to handle a disallowed clause with value
This commit is contained in:
@@ -31,6 +31,7 @@ class Probot
|
|||||||
@current_agents = ["*"]
|
@current_agents = ["*"]
|
||||||
@current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
|
@current_agents.each { |agent| @rules[agent] ||= {"disallow" => [], "allow" => [], "crawl_delay" => 0} }
|
||||||
@sitemaps = []
|
@sitemaps = []
|
||||||
|
|
||||||
@site = URI(data) if data.start_with?("http")
|
@site = URI(data) if data.start_with?("http")
|
||||||
@doc = @site.nil? ? data : fetch_robots_txt(@site)
|
@doc = @site.nil? ? data : fetch_robots_txt(@site)
|
||||||
parse(@doc)
|
parse(@doc)
|
||||||
@@ -92,7 +93,9 @@ class Probot
|
|||||||
|
|
||||||
# All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
|
# All Regex characters are escaped, then we unescape * and $ as they may used in robots.txt
|
||||||
if data.allow? || data.disallow?
|
if data.allow? || data.disallow?
|
||||||
@current_agents.each { |agent| rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) }
|
@current_agents.each do |agent|
|
||||||
|
rules[agent][data.key] << Regexp.new(Regexp.escape(data.value).gsub('\*', ".*").gsub('\$', "$")) unless data.value.nil?
|
||||||
|
end
|
||||||
|
|
||||||
# When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
|
# When user-agent strings are found on consecutive lines, they are considered to be part of the same record. Google ignores crawl_delay.
|
||||||
subsequent_agent = false
|
subsequent_agent = false
|
||||||
@@ -128,6 +131,8 @@ class Probot
|
|||||||
|
|
||||||
def clean_value = raw_value.split("#").first&.strip
|
def clean_value = raw_value.split("#").first&.strip
|
||||||
|
|
||||||
|
def clean_url = clean_value&.then { URI(_1).to_s }
|
||||||
|
|
||||||
def agent? = key == "user-agent"
|
def agent? = key == "user-agent"
|
||||||
|
|
||||||
def disallow? = key == "disallow"
|
def disallow? = key == "disallow"
|
||||||
@@ -140,11 +145,13 @@ class Probot
|
|||||||
|
|
||||||
def value
|
def value
|
||||||
return clean_value.to_f if crawl_delay?
|
return clean_value.to_f if crawl_delay?
|
||||||
return URI(clean_value).to_s if disallow? || allow?
|
return clean_url if disallow? || allow?
|
||||||
|
|
||||||
raw_value
|
raw_value
|
||||||
rescue URI::InvalidURIError
|
rescue URI::InvalidURIError
|
||||||
raw_value
|
raw_value
|
||||||
|
rescue ArgumentError
|
||||||
|
raw_value
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -29,4 +29,5 @@ Gem::Specification.new do |spec|
|
|||||||
spec.bindir = "exe"
|
spec.bindir = "exe"
|
||||||
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
||||||
spec.require_paths = ["lib"]
|
spec.require_paths = ["lib"]
|
||||||
|
spec.add_development_dependency "debug"
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -95,6 +95,19 @@ class TestProbot < Minitest::Test
|
|||||||
crawl_delay: 0
|
crawl_delay: 0
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
txt: %("User-agent: *\nDisallow: /wp/wp-admin/\nAllow: /wp/wp-admin/admin-ajax.php\n\nUser-agent: *\nDisallow: /wp-content/uploads/wpo/wpo-plugins-tables-list.json\n\n# START YOAST BLOCK\n# ---------------------------\nUser-agent: *\nDisallow:\n\nSitemap: https://prhinternationalsales.com/sitemap_index.xml\n# ---------------------------\n# END YOAST BLOCK"),
|
||||||
|
sitemaps: ["https://prhinternationalsales.com/sitemap_index.xml"],
|
||||||
|
found_agents: ["*"],
|
||||||
|
tests: [
|
||||||
|
{
|
||||||
|
agent: "*",
|
||||||
|
allowed: ["/wp/wp-admin/admin-ajax.php"],
|
||||||
|
disallowed: ["/wp/wp-admin/", "/wp-content/uploads/wpo/wpo-plugins-tables-list.json"],
|
||||||
|
crawl_delay: 0
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
].freeze
|
].freeze
|
||||||
|
|
||||||
@@ -132,6 +145,7 @@ class TestProbot < Minitest::Test
|
|||||||
def test_empty_allow_disallow
|
def test_empty_allow_disallow
|
||||||
assert Probot.new(%(User-agent: *\nAllow:)).rules.dig("*", "allow").empty?
|
assert Probot.new(%(User-agent: *\nAllow:)).rules.dig("*", "allow").empty?
|
||||||
assert Probot.new(%(User-agent: *\nDisallow:)).rules.dig("*", "disallow").empty?
|
assert Probot.new(%(User-agent: *\nDisallow:)).rules.dig("*", "disallow").empty?
|
||||||
|
assert Probot.new(%(User-agent: *\nDisallow:\n\n)).rules.dig("*", "disallow").empty?
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_consecutive_user_agents
|
def test_consecutive_user_agents
|
||||||
|
|||||||
Reference in New Issue
Block a user