Skip to content

Commit

Permalink
docs: Improved documentation.
Browse files Browse the repository at this point in the history
  • Loading branch information
vxern committed Apr 20, 2022
1 parent 6041d79 commit 9f6100c
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 43 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 1.1.0+3

- Improved documentation.
- Bumped year in the license.

## 1.1.0+2

- Updated package description.
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2021 WordCollector
Copyright (c) 2022 WordCollector

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
16 changes: 8 additions & 8 deletions example/example.dart
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
import 'package:robots_txt/robots_txt.dart';

Future main() async {
// Create an instance of the `robots.txt` parser
// Create an instance of the `robots.txt` parser.
final robots = Robots(host: 'https://github.com/');
// Read the ruleset of the website
// Read the ruleset of the website.
await robots.read();
// Print the ruleset
// Print the ruleset.
for (final ruleset in robots.rulesets) {
// Print the user-agent the ruleset applies to
// Print the user-agent the ruleset applies to.
print(ruleset.appliesTo);
if (ruleset.allows.isNotEmpty) {
print('Allows:');
}
// Print the path expressions allowed by this ruleset
// Print the path expressions allowed by this ruleset.
for (final rule in ruleset.allows) {
print(' - ${rule.expression}');
}
if (ruleset.disallows.isNotEmpty) {
print('Disallows:');
}
// Print the path expressions disallowed by this ruleset
// Print the path expressions disallowed by this ruleset.
for (final rule in ruleset.disallows) {
print(' - ${rule.expression}');
}
}
// False, it cannot
// False: it cannot.
print(robots.canVisitPath('/gist/', userAgent: '*'));
// True, it can
// True: it can.
print(robots.canVisitPath('/wordcollector/robots_txt', userAgent: '*'));
return;
}
2 changes: 1 addition & 1 deletion lib/robots_txt.dart
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/// Lightweight, sturdy and fully documented library for parsing the
/// `robots.txt` file. Nothing more, nothing less.
/// `robots.txt` file. Nothing more, nothing less.
library robots_txt;

export 'src/parser.dart';
Expand Down
28 changes: 14 additions & 14 deletions lib/src/parser.dart
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,27 @@ import 'package:sprint/sprint.dart';
import 'package:web_scraper/web_scraper.dart';

/// Abstracts away the rather convoluted declaration for an element with two
/// fields; 'title' and 'attributes'. 'attributes' is a map containing the
/// attributes of the element
/// fields; 'title' and 'attributes'. 'attributes' is a map containing the
/// attributes of the element.
typedef Element = Map<String, Map<String, dynamic>>;

/// Allows for parsing of a host's `robots.txt` to get information about which
/// of its resources may or may not be accessed, as well as which of its pages
/// cannot be traversed
/// cannot be traversed.
class Robots {
/// Instance of `Sprint` message printer for the `robots.txt` parser
/// Instance of `Sprint` message logger for the `robots.txt` parser.
final Sprint log;

/// The host of this `robots.txt` file
/// The host of this `robots.txt` file.
final String host;

/// Stores an instance of the scraper for a given URL
/// Stores an instance of the scraper for a given URL.
final WebScraper scraper;

/// Stores expressions for both paths which may or may not be traversed
/// Stores expressions for both paths which may or may not be traversed.
final List<Ruleset> rulesets = [];

/// Creates an instance of a `robots.txt` parser for the [host]
/// Creates an instance of a `robots.txt` parser for the provided [host].
Robots({
required this.host,
bool quietMode = false,
Expand All @@ -33,7 +33,7 @@ class Robots {
log = Sprint('Robots',
quietMode: quietMode, productionMode: productionMode);

/// Reads and parses the `robots.txt` file of the host
/// Reads and parses the `robots.txt` file of the [host].
Future read({String? onlyRelevantTo}) async {
await scraper.loadWebPage('/robots.txt');
final body = scraper.getElement('body', [])[0];
Expand All @@ -51,7 +51,7 @@ class Robots {
}

/// Iterates over [lines] and parses each ruleset, additionally ignoring
/// those rulesets which are not relevant to [onlyRelevantTo]
/// those rulesets which are not relevant to [onlyRelevantTo].
void parseRulesets(Iterable<String> lines, {String? onlyRelevantTo}) {
Ruleset? ruleset;
for (var index = 0; index < lines.length; index++) {
Expand Down Expand Up @@ -90,9 +90,9 @@ class Robots {
}

/// Reads a path declaration from within `robots.txt` and converts it to a
/// regular expression for later matching
/// regular expression for later matching.
RegExp convertFieldPathToExpression(String pathDeclaration) {
// Collapse duplicate slashes and wildcards into singles
// Collapse duplicate slashes and wildcards into singles.
final collapsed =
pathDeclaration.replaceAll('/+', '/').replaceAll('*+', '*');
final normalised = collapsed.endsWith('*')
Expand All @@ -106,15 +106,15 @@ class Robots {
return RegExp(withTrailingText, caseSensitive: false, dotAll: true);
}

/// Extracts the key and value from [target] and puts it into a `MapEntry`
/// Extracts the key and value from [target] and puts it into a `MapEntry`.
MapEntry<String, String> getRobotsFieldFromLine(String target) {
final keyValuePair = target.split(':');
final key = keyValuePair[0].toLowerCase();
final value = keyValuePair.sublist(1).join(':').trim();
return MapEntry(key, value);
}

/// Determines whether or not [path] may be traversed
/// Determines whether or not [path] may be traversed.
bool canVisitPath(String path, {required String userAgent}) {
final explicitAllowance = rulesets.getRule(
appliesTo: userAgent, concernsPath: path, andAllowsIt: true);
Expand Down
12 changes: 6 additions & 6 deletions lib/src/rule.dart
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
/// A single rule (either `Allow` or `Disallow`) inside the `robots.txt` file
/// A single rule (either `Allow` or `Disallow`) inside the `robots.txt` file.
class Rule {
/// An expression which a path may be matched against to determine whether
/// this rule applies to the path
/// this rule applies to the path.
final RegExp expression;

/// The priority of this rule based on its position inside the `robots.txt`
/// file. If the path is determined to be relevant to two rules, the rule
/// file. If the path is determined to be relevant to two rules, the rule
/// with the higher priority *overrides* the ruling of the other.
final int priority;

/// Instantiates a rule with an [expression] and the [priority] it has over
/// other rules
/// other rules.
const Rule(this.expression, this.priority);
}

/// Extends `List<Rule>` with a method for getting the `Rule` with the highest
/// [Rule.priority]
/// [Rule.priority].
extension RulingOnPath on List<Rule> {
/// Taking [path], checks which `Rule`s' expressions match [path], and
/// returns the `Rule` with the highest priority
/// returns the `Rule` with the highest priority.
Rule? getRulingOnPath(String path) {
final relevantRules = where((rule) => rule.expression.hasMatch(path));
if (relevantRules.isEmpty) {
Expand Down
16 changes: 8 additions & 8 deletions lib/src/ruleset.dart
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
import 'package:robots_txt/src/rule.dart';

/// A collection of `Rule`s, and the `user-agent` they are relevant to inside
/// the `robots.txt` file
/// the `robots.txt` file.
class Ruleset {
/// The `user-agent` which this ruleset applies to
/// The `user-agent` which this ruleset applies to.
final String appliesTo;

/// List of `Rule`s which explicitly state that a path may be traversed
/// List of `Rule`s which explicitly state that a path may be traversed.
final List<Rule> allows = [];

/// List of `Rule`s which explicitly state that a path may not be traversed
/// List of `Rule`s which explicitly state that a path may not be traversed.
final List<Rule> disallows = [];

/// Instantiates a ruleset with the `user-agent`
/// Instantiates a ruleset with the `user-agent`.
Ruleset(this.appliesTo);

/// Checks whether this ruleset applies to [userAgent]
/// Checks whether this ruleset applies to [userAgent].
bool doesConcern(String userAgent) =>
appliesTo == '*' || appliesTo == userAgent;
}

/// Extends `List<Ruleset>` with a method for getting a single `Rule` from the
/// list of `Ruleset`s
/// list of `Rulesets`
extension RulingOfRulesets on List<Ruleset> {
/// Gets the rule which [appliesTo], [concernsPath] [andAllowsIt]
/// Gets the rule which [appliesTo], [concernsPath] [andAllowsIt].
Rule? getRule({
required String appliesTo,
required String concernsPath,
Expand Down
2 changes: 1 addition & 1 deletion lib/src/utils.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/// Taking the singular form of [word], morph it according to [count]
/// Taking the singular form of [word], morphs it according to [count].
String pluralise(String word, int count) => '${count == 0 ? 'no' : count} '
'${count == 0 || count > 1 ? '${word}s' : word}';
6 changes: 2 additions & 4 deletions pubspec.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
publish_to: https://pub.dev

name: robots_txt
version: 1.1.0+2
version: 1.1.0+3

description: >-
A simple yet complete, lightweight and sturdy `robots.txt` ruleset parser
to ensure your application follows the standard protocol.
to ensure your application follows the standard.
homepage: https://github.com/wordcollector/robots_txt
repository: https://github.com/wordcollector/robots_txt
Expand Down

0 comments on commit 9f6100c

Please sign in to comment.