From ba72c981763f13fba63b2d46b4dca2e83dc815cf Mon Sep 17 00:00:00 2001 From: Peter Feichtinger Date: Fri, 21 Sep 2018 16:36:37 +0200 Subject: [PATCH 1/5] Preprocess: improve cssless processing speed This greatly improves the speed of cssless processing by using regular expressions instead of a full-blown CSS parser to manipulate the `style` attribute where possible. The CSS used on the site is simple enough so this works just fine. The tests needed some massaging to get them to pass again, because the new handling caused the following minor changes to the output. * Floating point values are always formatted with the decimal point, which was omitted previously when the fractional part was zero. * Existing CSS declarations are not touched when other declarations are added or removed, so there are some differences where spaces were added previously but aren't now. --- commands/preprocess_cssless.py | 67 ++- .../basic_string_expected.html | 470 +++++++++--------- .../multiset_expected.html | 254 +++++----- tests/test_preprocess_cssless.py | 38 +- 4 files changed, 425 insertions(+), 404 deletions(-) diff --git a/commands/preprocess_cssless.py b/commands/preprocess_cssless.py index b5f0900..7162c83 100644 --- a/commands/preprocess_cssless.py +++ b/commands/preprocess_cssless.py @@ -22,6 +22,7 @@ from io import StringIO from lxml.etree import strip_elements import logging +import re import os import warnings import io @@ -82,36 +83,58 @@ def needs_td_wrapper(element): if len(element.getchildren()) == 0: return True for el in element.getchildren(): - if has_css_property_value(el, 'display', 'table-row') or \ - has_css_property_value(el, 'display', 'table-cell'): + if get_css_property_value(el, 'display') in ('table-row', 'table-cell'): return False return True -def remove_css_property(element, property_name): - atrib = cssutils.parseStyle(element.get('style')) - atrib.removeProperty(property_name) - element.set('style', atrib.getCssText(separator='')) - if len(element.get('style')) == 0: - element.attrib.pop('style') +def remove_css_property(el, prop_name): + if el.get('style') is None: + return + decls = re.split(r'\s*;\s*', el.get('style')) + if decls[-1] == '': + decls.pop() + + idx = next((i for i,v in enumerate(decls) if v.startswith(prop_name + ':')), None) + if idx is not None: + del decls[idx] + if len(decls) == 0: + el.attrib.pop('style') + else: + el.set('style', ';'.join(decls)) def get_css_property_value(el, prop_name): - atrib = cssutils.parseStyle(el.get('style')) - value = atrib.getPropertyCSSValue(prop_name) - if value: - return value.cssText + if el.get('style') is None: + return None + + for decl in re.split(r'\s*;\s*', el.get('style')): + if decl.startswith(prop_name + ':'): + return decl[len(prop_name)+1:].strip() return None def has_css_property_value(el, prop_name, prop_value): - value = get_css_property_value(el, prop_name) - if value and value == prop_value: - return True - return False + if el.get('style') is None: + return False + + regex = r'(^|;)\s*{}:\s*{}(;|$)'.format(re.escape(prop_name), re.escape(prop_value)) + return re.search(regex, el.get('style')) is not None def set_css_property_value(el, prop_name, prop_value): - atrib = cssutils.parseStyle(el.get('style')) - atrib.setProperty(prop_name, prop_value) - el.set('style', atrib.getCssText(separator='')) + decl = '{}: {}'.format(prop_name, prop_value) + style = el.get('style') + if style is None or style == '': + el.set('style', decl) + else: + decls = re.split(r'\s*;\s*', style) + if decls[-1] == '': + decls.pop() + + try: + idx = next(i for i,v in enumerate(decls) if v.startswith(prop_name + ':')) + decls[idx] = decl + except StopIteration: + decls.append(decl) + el.set('style', ';'.join(decls)) def convert_display_property_to_html_tag(element, element_tag, display_value): str_attrib_value = element.get('style') @@ -170,8 +193,7 @@ def convert_span_tables_to_tr_td(root_el): def convert_inline_block_elements_to_table(root_el): for el in root_el.xpath('//*[contains(@style, "display")]'): - if not has_css_property_value(el, 'display', 'inline-block') and \ - not has_css_property_value(el, 'display', 'inline-table'): + if get_css_property_value(el, 'display') not in ('inline-block', 'inline-table'): continue elements_to_put_into_table = [el] @@ -179,8 +201,7 @@ def convert_inline_block_elements_to_table(root_el): # find subsequent inline block elements while el is not None: - if has_css_property_value(el, 'display', 'inline-block') or \ - has_css_property_value(el, 'display', 'inline-table'): + if get_css_property_value(el, 'display') in ('inline-block', 'inline-table'): elements_to_put_into_table.append(el) else: break diff --git a/tests/preprocess_cssless_data/basic_string_expected.html b/tests/preprocess_cssless_data/basic_string_expected.html index ed85b57..5ea0a34 100644 --- a/tests/preprocess_cssless_data/basic_string_expected.html +++ b/tests/preprocess_cssless_data/basic_string_expected.html @@ -1,5 +1,5 @@ - + std::basic_string - cppreference.com @@ -19,7 +19,7 @@ } - + @@ -27,8 +27,8 @@
-

-std::basic_string

+

+std::basic_string

@@ -39,25 +39,25 @@

-
 
- -
 
- -
 
-
std::basic_string
-
 
+
 
+ +
 
+ +
 
+
std::basic_string
+
 
- - - + + - - + - - + - - - + + +
Defined in header <string> +
Defined in header <string>
template<
+
template<

    class CharT,
    class Traits = std::char_traits<CharT>,
    class Allocator = std::allocator<CharT>
@@ -65,11 +65,11 @@

> class basic_string;

(1)
namespace pmr {
+
namespace pmr {

    template <class CharT, class Traits = std::char_traits<CharT>>
    using basic_string = std::basic_string< CharT, Traits,
                                            std::polymorphic_allocator<CharT>>
@@ -77,13 +77,13 @@

}

(2) (since C++17) (since C++17)

The class template basic_string stores and manipulates sequences of char-like objects, which are non-array objects of trivial type. The class is dependent neither on the character type nor on the nature of operations on that type. The definitions of the operations are supplied via the Traits template parameter - a specialization of std::char_traits or a compatible traits class. Traits::char_type and CharT must name the same type; otherwise the behavior is undefined. @@ -94,97 +94,97 @@

The elements of a basic_string are stored contiguously, that is, for a basic_string s, &*(s.begin() + n) == &*s.begin() + n for any n in [0, s.size()), or, equivalently, a pointer to s[0] can be passed to functions that expect a pointer to the first element of a CharT[] array.

-(since C++11) +(since C++11) -

std::basic_string satisfies the requirements of AllocatorAwareContainer, SequenceContainer and ContiguousContainer (since C++17) +

std::basic_string satisfies the requirements of AllocatorAwareContainer, SequenceContainer and ContiguousContainer (since C++17)

Several typedefs for common character types are provided:

- - - - - - - - - - - - - - - - - - - @@ -223,160 +223,160 @@

Member types

- - - - - - - - + - -
Defined in header <string>
+
Defined in header <string>
Type + Type Definition + Definition
std::string + std::string std::basic_string<char> + std::basic_string<char>
std::wstring + std::wstring std::basic_string<wchar_t> + std::basic_string<wchar_t>
std::u16string (C++11) + std::u16string (C++11) std::basic_string<char16_t> + std::basic_string<char16_t>
std::u32string (C++11) + std::u32string (C++11) std::basic_string<char32_t> + std::basic_string<char32_t>
std::pmr::string (C++17) + std::pmr::string (C++17) std::pmr::basic_string<char> + std::pmr::basic_string<char>
std::pmr::wstring (C++17) + std::pmr::wstring (C++17) std::pmr::basic_string<wchar_t> + std::pmr::basic_string<wchar_t>
std::pmr::u16string (C++17) + std::pmr::u16string (C++17) std::pmr::basic_string<char16_t> + std::pmr::basic_string<char16_t>
std::pmr::u32string (C++17) + std::pmr::u32string (C++17) std::pmr::basic_string<char32_t> + std::pmr::basic_string<char32_t>
Member type + Member type Definition + Definition
traits_type + traits_type Traits + Traits
value_type + value_type CharT + CharT
allocator_type + allocator_type Allocator Allocator
size_type + size_type + - -
- + - +
Allocator::size_type(until C++11)(until C++11)
std::allocator_traits<Allocator>::size_type(since C++11)(since C++11)
difference_type + difference_type + - -
- + - +
Allocator::difference_type(until C++11)(until C++11)
std::allocator_traits<Allocator>::difference_type(since C++11)(since C++11)
reference + reference + - -
- + - +
Allocator::reference(until C++11)(until C++11)
value_type&(since C++11)(since C++11)
const_reference + const_reference + - -
- + - +
Allocator::const_reference(until C++11)(until C++11)
const value_type&(since C++11)(since C++11)
pointer + pointer + - - - - + - - - - + - - + - - + @@ -428,60 +428,60 @@

Member func

- - + - - + -
- + - +
Allocator::pointer(until C++11)(until C++11)
std::allocator_traits<Allocator>::pointer(since C++11)(since C++11)
const_pointer + const_pointer + - - + - - + - - + - - +
- + - +
Allocator::const_pointer(until C++11)(until C++11)
std::allocator_traits<Allocator>::const_pointer(since C++11)(since C++11)
iterator + iterator RandomAccessIterator RandomAccessIterator
const_iterator + const_iterator Constant RandomAccessIterator Constant RandomAccessIterator
reverse_iterator + reverse_iterator std::reverse_iterator<iterator> std::reverse_iterator<iterator>
const_reverse_iterator + const_reverse_iterator std::reverse_iterator<const_iterator> std::reverse_iterator<const_iterator>

Member functions

@@ -384,39 +384,39 @@

Member func

+ constructs a basic_string
(public member function)
constructs a basic_string
(public member function)
(destructor)
+
(destructor)
destroys the string, deallocating internal storage if used
(public member function) +
destroys the string, deallocating internal storage if used
(public member function)
+ assigns values to the string
(public member function)
assigns values to the string
(public member function)
+ assign characters to a string
(public member function)
assign characters to a string
(public member function)
+ returns the associated allocator
(public member function)
returns the associated allocator
(public member function)
+ accesses the specified character with bounds checking
(public member function)
accesses the specified character with bounds checking
(public member function)
+ accesses the specified character
(public member function)
accesses the specified character
(public member function)
+ - + -
- +
(C++11)
(C++11)
accesses the first character
(public member function)
accesses the first character
(public member function)
+ - + - - + - - + -
- +
(C++11)
(C++11)
accesses the last character
(public member function)
accesses the last character
(public member function)
+ returns a pointer to the first character of a string
(public member function)
returns a pointer to the first character of a string
(public member function)
+ returns a non-modifiable standard C character array version of the string
(public member function)
returns a non-modifiable standard C character array version of the string
(public member function)
+ - + @@ -493,66 +493,66 @@

Member func

-
- +
(C++17)
(C++17)
returns a non-modifiable string_view into the entire string
(public member function)
returns a non-modifiable string_view into the entire string
(public member function)
+ - + -
- +
(C++11)
(C++11)
returns an iterator to the beginning
(public member function)
returns an iterator to the beginning
(public member function)
+ - + -
- +
(C++11)
(C++11)
returns an iterator to the end
(public member function)
returns an iterator to the end
(public member function)
+ - + -
- +
(C++11)
(C++11)
returns a reverse iterator to the beginning
(public member function)
returns a reverse iterator to the beginning
(public member function)
+ - + @@ -564,50 +564,50 @@

Member func

- - + -
- +
(C++11)
(C++11)
returns a reverse iterator to the end
(public member function)
returns a reverse iterator to the end
(public member function)
+ checks whether the string is empty
(public member function)
checks whether the string is empty
(public member function)
+ - + - - + - - + - - + -
returns the number of characters
(public member function)
returns the number of characters
(public member function)
+ returns the maximum number of characters
(public member function)
returns the maximum number of characters
(public member function)
+ reserves storage
(public member function)
reserves storage
(public member function)
+ returns the number of characters that can be held in currently allocated storage
(public member function)
returns the number of characters that can be held in currently allocated storage
(public member function)
+ - + @@ -619,116 +619,116 @@

Member func

- - + - - + - - + - - + -
- +
(C++11)
(C++11)
reduces memory usage by freeing unused memory
(public member function)
reduces memory usage by freeing unused memory
(public member function)
+ clears the contents
(public member function)
clears the contents
(public member function)
+ inserts characters
(public member function)
inserts characters
(public member function)
+ removes characters
(public member function)
removes characters
(public member function)
+ appends a character to the end
(public member function)
appends a character to the end
(public member function)
+ - + - - + - - + - - + -
- +
(C++11)
(C++11)
removes the last character
(public member function)
removes the last character
(public member function)
+ appends characters to the end
(public member function)
appends characters to the end
(public member function)
+ appends characters to the end
(public member function)
appends characters to the end
(public member function)
+ compares two strings
(public member function)
compares two strings
(public member function)
+ - + -
- +
(C++20)
(C++20)
checks if the string starts with the given prefix
(public member function)
checks if the string starts with the given prefix
(public member function)
+ - + - - + - - + - - + - - + - - + @@ -740,44 +740,44 @@

Member func

- - + - - + - - + - - + - - + - - + @@ -789,12 +789,12 @@

Member func

- - - + -
- +
(C++20)
(C++20)
checks if the string ends with the given suffix
(public member function)
checks if the string ends with the given suffix
(public member function)
+ replaces specified portion of a string
(public member function)
replaces specified portion of a string
(public member function)
+ returns a substring
(public member function)
returns a substring
(public member function)
+ copies characters
(public member function)
copies characters
(public member function)
+ changes the number of characters stored
(public member function)
changes the number of characters stored
(public member function)
+ swaps the contents
(public member function)
swaps the contents
(public member function)
+ find characters in the string
(public member function)
find characters in the string
(public member function)
+ find the last occurrence of a substring
(public member function)
find the last occurrence of a substring
(public member function)
+ find first occurrence of characters
(public member function)
find first occurrence of characters
(public member function)
+ find first absence of characters
(public member function)
find first absence of characters
(public member function)
+ find last occurrence of characters
(public member function)
find last occurrence of characters
(public member function)
+ find last absence of characters
(public member function)
find last absence of characters
(public member function)
+ - +
- +
[static]
[static]
special value. The exact meaning depends on the context
(public static member constant)
special value. The exact meaning depends on the context
(public static member constant)

Non-member functions

@@ -802,14 +802,14 @@

Non-mem

+ concatenates two strings or a string and a char
(function template)
concatenates two strings or a string and a char
(function template)
+ - + -
lexicographically compares two strings
(function template)
lexicographically compares two strings
(function template)
- + @@ -838,19 +838,19 @@

Non-mem

-
std::swap(std::basic_string) + specializes the std::swap algorithm
(function template)
specializes the std::swap algorithm
(function template)
+ - + - - + @@ -862,92 +862,92 @@

Non-mem

-
performs stream input and output on strings
(function template)
performs stream input and output on strings
(function template)
+ read data from an I/O stream into a string
(function template)
read data from an I/O stream into a string
(function template)
+ - + -
- - - + + +
(C++11)
(C++11)
(C++11)
(C++11)
(C++11)
(C++11)
converts a string to a signed integer
(function)
converts a string to a signed integer
(function)
+ - + -
- - + +
(C++11)
(C++11)
(C++11)
(C++11)
converts a string to an unsigned integer
(function)
converts a string to an unsigned integer
(function)
+ - + -
- - - + + +
(C++11)
(C++11)
(C++11)
(C++11)
(C++11)
(C++11)
converts a string to a floating point value
(function)
converts a string to a floating point value
(function)
+ - + -
- +
(C++11)
(C++11)
converts an integral or floating point value to string
(function)
converts an integral or floating point value to string
(function)
+ - +
- +
(C++11)
(C++11)
converts an integral or floating point value to wstring
(function)
converts an integral or floating point value to wstring
(function)

Literals

- - - - - - - + - - + - - + - - + - - + - - + - - + - -
Defined in inline namespace std::literals::string_literals
+
Defined in inline namespace std::literals::string_literals
+ - +
- +
(C++14)
(C++14)
Converts a character array literal to basic_string
(function)
Converts a character array literal to basic_string
(function)

Helper classes

@@ -955,41 +955,41 @@

Helper classe

+ - +
hash support for strings
(class template specialization)
hash support for strings
(class template specialization)
-

Deduction guides(since C++17)

+

Deduction guides(since C++17)

- + @@ -27,8 +27,8 @@
-

-std::multiset

+

+std::multiset

@@ -39,25 +39,25 @@

-
 
- -
 
- -
 
-
std::multiset
-
 
+
 
+ +
 
+ +
 
+
std::multiset
+
 
- - - + + - - + - - + - - - + + +
Defined in header <set> +
Defined in header <set>
template<
+
template<

    class Key,
    class Compare = std::less<Key>,
    class Allocator = std::allocator<Key>
@@ -65,11 +65,11 @@

> class multiset;

(1)
namespace pmr {
+
namespace pmr {

    template <class Key, class Compare = std::less<Key>>
    using multiset = std::multiset<Key, Compare,
                                   std::pmr::polymorphic_allocator<Key>>;
@@ -77,20 +77,20 @@

}

(2) (since C++17) (since C++17)

std::multiset is an associative container that contains a sorted set of objects of type Key. Unlike set, multiple keys with equivalent values are allowed. Sorting is done using the key comparison function Compare. Search, insertion, and removal operations have logarithmic complexity.

Everywhere the standard library uses the Compare concept, equivalence is determined by using the equivalence relation as described on Compare. In imprecise terms, two objects a and b are considered equivalent if neither compares less than the other: !comp(a, b) && !comp(b, a).

-

The order of the elements that compare equivalent is the order of insertion and does not change. (since C++11) +

The order of the elements that compare equivalent is the order of insertion and does not change. (since C++11)

std::multiset meets the requirements of Container, AllocatorAwareContainer, AssociativeContainer and ReversibleContainer.

@@ -99,169 +99,169 @@

Member types

Member type + Member type Definition + Definition
key_type + key_type Key Key
value_type + value_type Key Key
size_type + size_type Unsigned integer type (usually std::size_t) Unsigned integer type (usually std::size_t)
difference_type + difference_type Signed integer type (usually std::ptrdiff_t) Signed integer type (usually std::ptrdiff_t)
key_compare + key_compare Compare Compare
value_compare + value_compare Compare Compare
allocator_type + allocator_type Allocator Allocator
reference + reference + - -
- + - +
Allocator::reference(until C++11)(until C++11)
value_type&(since C++11)(since C++11)
const_reference + const_reference + - -
- + - +
Allocator::const_reference(until C++11)(until C++11)
const value_type&(since C++11)(since C++11)
pointer + pointer + - -
- + - +
Allocator::pointer(until C++11)(until C++11)
std::allocator_traits<Allocator>::pointer(since C++11)(since C++11)
const_pointer + const_pointer + - - - - + - - + - - + - - + @@ -304,7 +304,7 @@

Member func

-
- + - +
Allocator::const_pointer(until C++11)(until C++11)
std::allocator_traits<Allocator>::const_pointer(since C++11)(since C++11)
iterator + iterator + - - + - - + - - + - - +
- + - +
BidirectionalIterator(until C++11)(until C++11)
Constant BidirectionalIterator (since C++11)(since C++11)
const_iterator + const_iterator Constant BidirectionalIterator Constant BidirectionalIterator
reverse_iterator + reverse_iterator std::reverse_iterator<iterator> std::reverse_iterator<iterator>
const_reverse_iterator + const_reverse_iterator std::reverse_iterator<const_iterator> std::reverse_iterator<const_iterator>
node_type(since C++17) + node_type(since C++17) a specialization of node handle representing a container node a specialization of node handle representing a container node

Member functions

@@ -269,30 +269,30 @@

Member func

+ constructs the multiset
(public member function)
constructs the multiset
(public member function)
+ destructs the multiset
(public member function)
destructs the multiset
(public member function)
+ assigns values to the container
(public member function)
assigns values to the container
(public member function)
+ returns the associated allocator
(public member function)
returns the associated allocator
(public member function)
+ - + -
returns an iterator to the beginning
(public member function)
returns an iterator to the beginning
(public member function)
+ - + -
returns an iterator to the end
(public member function)
returns an iterator to the end
(public member function)
+ - + -
returns a reverse iterator to the beginning
(public member function)
returns a reverse iterator to the beginning
(public member function)
+ - + @@ -375,23 +375,23 @@

Member func

- - + - - + - - + @@ -403,70 +403,70 @@

Member func

- - + - - + -
returns a reverse iterator to the end
(public member function)
returns a reverse iterator to the end
(public member function)
+ checks whether the container is empty
(public member function)
checks whether the container is empty
(public member function)
+ returns the number of elements
(public member function)
returns the number of elements
(public member function)
+ returns the maximum possible number of elements
(public member function)
returns the maximum possible number of elements
(public member function)
+ clears the contents
(public member function)
clears the contents
(public member function)
+ inserts elements or nodes (since C++17)
(public member function)
inserts elements or nodes (since C++17)
(public member function)
+ - + -
- +
(C++11)
(C++11)
constructs element in-place
(public member function)
constructs element in-place
(public member function)
+ - + - - + - - + -
- +
(C++11)
(C++11)
constructs elements in-place using a hint
(public member function)
constructs elements in-place using a hint
(public member function)
+ erases elements
(public member function)
erases elements
(public member function)
+ swaps the contents
(public member function)
swaps the contents
(public member function)
+ - + - -
- +
(C++17)
(C++17)
extracts nodes from the container
(public member function)
extracts nodes from the container
(public member function)
+ - + @@ -478,37 +478,37 @@

Member func

- - + - - + - - + - - + - - + @@ -520,16 +520,16 @@

Member func

- - + - - +
- +
(C++17)
(C++17)
splices nodes from another container
(public member function)
splices nodes from another container
(public member function)
+ returns the number of elements matching specific key
(public member function)
returns the number of elements matching specific key
(public member function)
+ finds element with specific key
(public member function)
finds element with specific key
(public member function)
+ returns range of elements matching a specific key
(public member function)
returns range of elements matching a specific key
(public member function)
+ returns an iterator to the first element not less than the given key
(public member function)
returns an iterator to the first element not less than the given key
(public member function)
+ returns an iterator to the first element greater than the given key
(public member function)
returns an iterator to the first element greater than the given key
(public member function)
+ returns the function that compares keys
(public member function)
returns the function that compares keys
(public member function)
+ returns the function that compares keys in objects of type value_type
(public member function)
returns the function that compares keys in objects of type value_type
(public member function)

Non-member functions

@@ -537,7 +537,7 @@

Non-mem

+ - + -
lexicographically compares the values in the multiset
(function template)
lexicographically compares the values in the multiset
(function template)
- +
std::swap(std::multiset) + specializes the std::swap algorithm
(function template)
specializes the std::swap algorithm
(function template)
-

Deduction guides(since C++17)

+

Deduction guides(since C++17)

Notes

The member types iterator and const_iterator may be aliases to the same type. Since iterator is convertible to const_iterator, const_iterator should be used in function parameter lists to avoid violations of the One Definition Rule.

diff --git a/tests/test_preprocess_cssless.py b/tests/test_preprocess_cssless.py index 707b973..3dd7ff1 100644 --- a/tests/test_preprocess_cssless.py +++ b/tests/test_preprocess_cssless.py @@ -101,10 +101,10 @@ def test_normal_table(self): expected = '''\
- +
- - + +
1212
@@ -127,7 +127,7 @@ def test_wraps_table_row_text(self): expected = '''\
- +
little text
@@ -152,7 +152,7 @@ def test_wraps_into_td_table_row_children(self): expected = '''\
- +
blabla blabla2 @@ -179,7 +179,7 @@ def test_wraps_into_td_table_row_children_with_style(self): expected = '''\
- +
@@ -208,7 +208,7 @@ def test_wraps_into_td_table_row_children_with_tags(self): expected = '''\
-
(C++11)
+
bla
@@ -237,7 +237,7 @@ def test_does_not_wrap_into_td_children_when_not_in_table_row(self): expected = '''\
- +
2 @@ -264,7 +264,7 @@ def test_does_not_wrap_children_when_sibling_is_td(self): expected = '''\
-
+
1 @@ -293,7 +293,7 @@ def test_does_not_convert_tr_when_parent_is_not_table(self): expected = '''\
-
2
+
blabla @@ -323,7 +323,7 @@ def test_does_not_convert_td_when_parent_is_not_tr(self): expected = '''\
-
+
blabla @@ -386,7 +386,7 @@ def test_fun(root): expected = '''\ - ''' @@ -458,7 +458,7 @@ def test_fun(root): ''' expected = '''\ -
+
text
''' @@ -478,8 +478,8 @@ def test_fun(root): expected = '''\ -
-
+
+
text
''' @@ -497,7 +497,7 @@ def test_fun(root): ''' expected = '''\ -
+
text
''' @@ -516,7 +516,7 @@ def test_fun(root): ''' expected = '''\ -
+
text
''' @@ -537,7 +537,7 @@ def test_fun(root): expected = '''\ -
+
text
@@ -561,7 +561,7 @@ def test_fun(root): expected = '''\ -
+
text
From 78ff38045469730990790a266f2b8eb995fee80e Mon Sep 17 00:00:00 2001 From: Peter Feichtinger Date: Sat, 22 Sep 2018 23:59:24 +0200 Subject: [PATCH 2/5] Preprocess: add premailer source code Premailer is used by the cssless preprocessing script to inline CSS styles, it hasn't been maintained in a while. This adds the source code of the premailer module so we can make performance improvements to cssless preprocessing. These are going to be changes specific to this project, which won't be of any use to upstream premailer. --- premailer/LICENSE | 25 + premailer/__init__.py | 4 + premailer/__main__.py | 156 ++ premailer/cache.py | 85 + premailer/merge_style.py | 113 + premailer/premailer.py | 678 +++++ premailer/tests/test-apple-newsletter.html | 101 + premailer/tests/test-external-links.css | 12 + premailer/tests/test-external-styles.css | 12 + premailer/tests/test-issue78.html | 21 + premailer/tests/test-unicode.html | 15 + premailer/tests/test_cache.py | 47 + premailer/tests/test_merge_style.py | 18 + premailer/tests/test_premailer.py | 2717 ++++++++++++++++++++ premailer/tests/test_utils.py | 19 + 15 files changed, 4023 insertions(+) create mode 100644 premailer/LICENSE create mode 100644 premailer/__init__.py create mode 100644 premailer/__main__.py create mode 100644 premailer/cache.py create mode 100644 premailer/merge_style.py create mode 100644 premailer/premailer.py create mode 100644 premailer/tests/test-apple-newsletter.html create mode 100644 premailer/tests/test-external-links.css create mode 100644 premailer/tests/test-external-styles.css create mode 100644 premailer/tests/test-issue78.html create mode 100644 premailer/tests/test-unicode.html create mode 100644 premailer/tests/test_cache.py create mode 100644 premailer/tests/test_merge_style.py create mode 100644 premailer/tests/test_premailer.py create mode 100644 premailer/tests/test_utils.py diff --git a/premailer/LICENSE b/premailer/LICENSE new file mode 100644 index 0000000..0929a01 --- /dev/null +++ b/premailer/LICENSE @@ -0,0 +1,25 @@ +Copyright (c) 2009-2012, Peter Bengtsson +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Peter Bengtsson nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Peter Bengtsson OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/premailer/__init__.py b/premailer/__init__.py new file mode 100644 index 0000000..864d79f --- /dev/null +++ b/premailer/__init__.py @@ -0,0 +1,4 @@ +from __future__ import absolute_import, unicode_literals +from .premailer import Premailer, transform + +__version__ = '3.2.0' diff --git a/premailer/__main__.py b/premailer/__main__.py new file mode 100644 index 0000000..1a09145 --- /dev/null +++ b/premailer/__main__.py @@ -0,0 +1,156 @@ +from __future__ import absolute_import, unicode_literals +import sys +import argparse + +from .premailer import Premailer + + +def main(args): + """Command-line tool to transform html style to inline css + + Usage:: + + $ echo '

Title

' | \ + python -m premailer +

+ $ cat newsletter.html | python -m premailer + """ + + parser = argparse.ArgumentParser(usage='python -m premailer [options]') + + parser.add_argument( + "-f", "--file", nargs='?', type=argparse.FileType('r'), + help="Specifies the input file. The default is stdin.", + default=sys.stdin, dest="infile" + ) + + parser.add_argument( + "-o", "--output", nargs='?', type=argparse.FileType('w'), + help="Specifies the output file. The default is stdout.", + default=sys.stdout, dest="outfile" + ) + + parser.add_argument( + "--base-url", default=None, type=str, dest="base_url" + ) + + parser.add_argument( + "--remove-internal-links", default=True, + help="Remove links that start with a '#' like anchors.", + dest="preserve_internal_links" + ) + + parser.add_argument( + "--exclude-pseudoclasses", default=False, + help="Pseudo classes like p:last-child', p:first-child, etc", + action="store_true", dest="exclude_pseudoclasses" + ) + + parser.add_argument( + "--preserve-style-tags", default=False, + help="Do not delete tags from the html document.", + action="store_true", dest="keep_style_tags" + ) + + parser.add_argument( + "--remove-star-selectors", default=True, + help="All wildcard selectors like '* {color: black}' will be removed.", + action="store_false", dest="include_star_selectors" + ) + + parser.add_argument( + "--remove-classes", default=False, + help="Remove all class attributes from all elements", + action="store_true", dest="remove_classes" + ) + + parser.add_argument( + "--capitalize-float-margin", default=False, + help="Capitalize float and margin properties for outlook.com compat.", + action="store_true", dest="capitalize_float_margin" + ) + + parser.add_argument( + "--strip-important", default=False, + help="Remove '!important' for all css declarations.", + action="store_true", dest="strip_important" + ) + + parser.add_argument( + "--method", default="html", dest="method", + help="The type of html to output. 'html' for HTML, 'xml' for XHTML." + ) + + parser.add_argument( + "--base-path", default=None, dest="base_path", + help="The base path for all external stylsheets." + ) + + parser.add_argument( + "--external-style", action="append", dest="external_styles", + help="The path to an external stylesheet to be loaded." + ) + + parser.add_argument( + "--css-text", action="append", dest="css_text", + help="CSS text to be applied to the html." + ) + + parser.add_argument( + "--disable-basic-attributes", dest="disable_basic_attributes", + help="Disable provided basic attributes (comma separated)", default=[] + ) + + parser.add_argument( + "--disable-validation", default=False, + action="store_true", dest="disable_validation", + help="Disable CSSParser validation of attributes and values", + ) + + parser.add_argument( + "--pretty", default=False, + action="store_true", + help="Pretty-print the outputted HTML.", + ) + + parser.add_argument( + "--encoding", default='utf-8', + help="Output encoding. The default is utf-8", + ) + + options = parser.parse_args(args) + + if options.disable_basic_attributes: + options.disable_basic_attributes = ( + options.disable_basic_attributes.split() + ) + + html = options.infile.read() + if hasattr(html, 'decode'): # Forgive me: Python 2 compatability + html = html.decode('utf-8') + + p = Premailer( + html=html, + base_url=options.base_url, + preserve_internal_links=options.preserve_internal_links, + exclude_pseudoclasses=options.exclude_pseudoclasses, + keep_style_tags=options.keep_style_tags, + include_star_selectors=options.include_star_selectors, + remove_classes=options.remove_classes, + strip_important=options.strip_important, + external_styles=options.external_styles, + css_text=options.css_text, + method=options.method, + base_path=options.base_path, + disable_basic_attributes=options.disable_basic_attributes, + disable_validation=options.disable_validation + ) + options.outfile.write(p.transform( + encoding=options.encoding, + pretty_print=options.pretty + )) + return 0 + + +if __name__ == '__main__': # pragma: no cover + sys.exit(main(sys.argv[1:])) diff --git a/premailer/cache.py b/premailer/cache.py new file mode 100644 index 0000000..242dc8b --- /dev/null +++ b/premailer/cache.py @@ -0,0 +1,85 @@ +import functools + + +class _HashedSeq(list): + # # From CPython + __slots__ = 'hashvalue' + + def __init__(self, tup, hash=hash): + self[:] = tup + self.hashvalue = hash(tup) + + def __hash__(self): + return self.hashvalue + + +# if we only have nonlocal +class _Cache(object): + def __init__(self): + self.off = False + self.missed = 0 + self.cache = {} + + +def function_cache(expected_max_entries=1000): + """ + function_cache is a decorator for caching function call + the argument to the wrapped function must be hashable else + it will not work + + expected_max_entries is for protecting cache failure. If cache + misses more than this number the cache will turn off itself. + Specify None you sure that the cache will not cause memory + limit problem. + + Args: + expected_max_entries(integer OR None): will raise if not correct + + Returns: + function + + """ + if ( + expected_max_entries is not None and + not isinstance(expected_max_entries, int) + ): + raise TypeError( + 'Expected expected_max_entries to be an integer or None' + ) + + # indicator of cache missed + sentinel = object() + + def decorator(func): + cached = _Cache() + + @functools.wraps(func) + def inner(*args, **kwargs): + if cached.off: + return func(*args, **kwargs) + + keys = args + if kwargs: + sorted_items = sorted(kwargs.items()) + for item in sorted_items: + keys += item + + hashed = hash(_HashedSeq(keys)) + result = cached.cache.get(hashed, sentinel) + if result is sentinel: + cached.missed += 1 + result = func(*args, **kwargs) + cached.cache[hashed] = result + # # something is wrong if we are here more than expected + # # empty and turn it off + if ( + expected_max_entries is not None and + cached.missed > expected_max_entries + ): + cached.off = True + cached.cache.clear() + + return result + + return inner + return decorator diff --git a/premailer/merge_style.py b/premailer/merge_style.py new file mode 100644 index 0000000..f9fc2b3 --- /dev/null +++ b/premailer/merge_style.py @@ -0,0 +1,113 @@ +import cssutils +import threading +from operator import itemgetter +try: + from collections import OrderedDict +except ImportError: # pragma: no cover + # some old python 2.6 thing then, eh? + from ordereddict import OrderedDict + + +def format_value(prop): + if prop.priority == "important": + return prop.propertyValue.cssText.strip() + ' !important' + else: + return prop.propertyValue.cssText.strip() + + +def csstext_to_pairs(csstext): + """ + csstext_to_pairs takes css text and make it to list of + tuple of key,value. + """ + # The lock is required to avoid ``cssutils`` concurrency + # issues documented in issue #65 + with csstext_to_pairs._lock: + return sorted( + [ + (prop.name.strip(), format_value(prop)) + for prop in cssutils.parseStyle(csstext) + ], + key=itemgetter(0) + ) + + +csstext_to_pairs._lock = threading.RLock() + + +def merge_styles( + inline_style, + new_styles, + classes, + remove_unset_properties=False +): + """ + This will merge all new styles where the order is important + The last one will override the first + When that is done it will apply old inline style again + The old inline style is always important and override + all new ones. The inline style must be valid. + + Args: + inline_style(str): the old inline style of the element if there + is one + new_styles: a list of new styles, each element should be + a list of tuple + classes: a list of classes which maps new_styles, important! + remove_unset_properties(bool): Allow us to remove certain CSS + properties with rules that set their value to 'unset' + + Returns: + str: the final style + """ + # building classes + styles = OrderedDict([('', OrderedDict())]) + for pc in set(classes): + styles[pc] = OrderedDict() + + for i, style in enumerate(new_styles): + for k, v in style: + styles[classes[i]][k] = v + + # keep always the old inline style + if inline_style: + # inline should be a declaration list as I understand + # ie property-name:property-value;... + for k, v in csstext_to_pairs(inline_style): + styles[''][k] = v + + normal_styles = [] + pseudo_styles = [] + for pseudoclass, kv in styles.items(): + if remove_unset_properties: + # Remove rules that we were going to have value 'unset' because + # they effectively are the same as not saying anything about the + # property when inlined + kv = OrderedDict( + (k, v) for (k, v) in kv.items() if not v.lower() == 'unset' + ) + if not kv: + continue + if pseudoclass: + pseudo_styles.append( + '%s{%s}' % ( + pseudoclass, + '; '.join('%s:%s' % (k, v) for k, v in kv.items()) + ) + ) + else: + normal_styles.append('; '.join( + '%s:%s' % (k, v) for k, v in kv.items() + )) + + if pseudo_styles: + # if we do or code thing correct this should not happen + # inline style definition: declarations without braces + all_styles = ( + (['{%s}' % ''.join(normal_styles)] + pseudo_styles) + if normal_styles else pseudo_styles + ) + else: + all_styles = normal_styles + + return ' '.join(all_styles).strip() diff --git a/premailer/premailer.py b/premailer/premailer.py new file mode 100644 index 0000000..fb2bc08 --- /dev/null +++ b/premailer/premailer.py @@ -0,0 +1,678 @@ +from __future__ import absolute_import, unicode_literals, print_function +import codecs +import operator +import os +import re +import warnings +try: + from collections import OrderedDict +except ImportError: # pragma: no cover + # some old python 2.6 thing then, eh? + from ordereddict import OrderedDict +import sys +if sys.version_info >= (3,): # pragma: no cover + # As in, Python 3 + from io import StringIO + from urllib.parse import urljoin, urlparse + STR_TYPE = str +else: # Python 2 + try: + from cStringIO import StringIO + except ImportError: # pragma: no cover + from StringIO import StringIO + StringIO = StringIO # shut up pyflakes + from urlparse import urljoin, urlparse + STR_TYPE = basestring # NOQA + +import cssutils +import requests +from lxml import etree +from lxml.cssselect import CSSSelector +from premailer.merge_style import merge_styles, csstext_to_pairs +from premailer.cache import function_cache + +__all__ = ['PremailerError', 'Premailer', 'transform'] + + +class PremailerError(Exception): + pass + + +class ExternalNotFoundError(ValueError): + pass + + +def make_important(bulk): + """makes every property in a string !important. + """ + return ';'.join('%s !important' % p if not p.endswith('!important') else p + for p in bulk.split(';')) + + +def get_or_create_head(root): + """Ensures that `root` contains a element and returns it. + """ + head = CSSSelector('head')(root) + if not head: + head = etree.Element('head') + body = CSSSelector('body')(root)[0] + body.getparent().insert(0, head) + return head + else: + return head[0] + + +@function_cache() +def _cache_parse_css_string(css_body, validate=True): + """ + This function will cache the result from cssutils + It is a big gain when number of rules is big + Maximum cache entries are 1000. This is mainly for + protecting memory leak in case something gone wild. + Be aware that you can turn the cache off in Premailer + + Args: + css_body(str): css rules in string format + validate(bool): if cssutils should validate + + Returns: + cssutils.css.cssstylesheet.CSSStyleSheet + + """ + return cssutils.parseString(css_body, validate=validate) + + +def capitalize_float_margin(css_body): + """Capitalize float and margin CSS property names + """ + def _capitalize_property(match): + return '{0}:{1}{2}'.format( + match.group('property').capitalize(), + match.group('value'), + match.group('terminator')) + + return _lowercase_margin_float_rule.sub(_capitalize_property, css_body) + + +_element_selector_regex = re.compile(r'(^|\s)\w') +_cdata_regex = re.compile(r'\<\!\[CDATA\[(.*?)\]\]\>', re.DOTALL) +_lowercase_margin_float_rule = re.compile( + r'''(?Pmargin(-(top|bottom|left|right))?|float) + : + (?P.*?) + (?P$|;)''', + re.IGNORECASE | re.VERBOSE) +_importants = re.compile('\s*!important') +#: The short (3-digit) color codes that cause issues for IBM Notes +_short_color_codes = re.compile(r'^#([0-9a-f])([0-9a-f])([0-9a-f])$', re.I) + +# These selectors don't apply to all elements. Rather, they specify +# which elements to apply to. +FILTER_PSEUDOSELECTORS = [':last-child', ':first-child', 'nth-child'] + + +class Premailer(object): + + attribute_name = 'data-premailer' + + def __init__(self, html, base_url=None, + disable_link_rewrites=False, + preserve_internal_links=False, + preserve_inline_attachments=True, + exclude_pseudoclasses=True, + keep_style_tags=False, + include_star_selectors=False, + remove_classes=False, + capitalize_float_margin=False, + strip_important=True, + external_styles=None, + css_text=None, + method="html", + base_path=None, + disable_basic_attributes=None, + disable_validation=False, + cache_css_parsing=True, + cssutils_logging_handler=None, + cssutils_logging_level=None, + disable_leftover_css=False, + align_floating_images=True, + remove_unset_properties=True): + self.html = html + self.base_url = base_url + + # If base_url is specified, it is used for loading external stylesheets + # via relative URLs. + # + # Also, if base_url is specified, premailer will transform all URLs by + # joining them with the base_url. Setting preserve_internal_links to + # True will disable this behavior for links to named anchors. Setting + # preserve_inline_attachments to True will disable this behavior for + # any links with cid: scheme. Setting disable_link_rewrites to True + # will disable this behavior altogether. + self.disable_link_rewrites = disable_link_rewrites + self.preserve_internal_links = preserve_internal_links + self.preserve_inline_attachments = preserve_inline_attachments + self.exclude_pseudoclasses = exclude_pseudoclasses + # whether to delete the + + +

Hi!

+

Yes!

+ + + """ + p = Premailer(html) + print(p.transform()) diff --git a/premailer/tests/test-apple-newsletter.html b/premailer/tests/test-apple-newsletter.html new file mode 100644 index 0000000..13d5449 --- /dev/null +++ b/premailer/tests/test-apple-newsletter.html @@ -0,0 +1,101 @@ + + + + Newsletter + + + + + + +
+
+ + + +
+ + + +
+ + + + + +
+ + + + +
+
Thanks for making a reservation.
+
+
+
+ + + +
+
+
+ + + + + +
+ + +
Dear peter,
+
You are scheduled for a Genius Bar appointment.
+
Topic: iPhone
+
Date: Wednesday, Aug 26, 2009
+
Time: 11:10AM
+
Location: Apple Store, Regent Street
+
+
Apple Store,
+
Regent Street
+ +
If you are no longer able to attend this session, please cancel or reschedule your reservation.
+ +
+ + + + +
+
+
We look forward to seeing you.
+
Your Apple Store team,
+
Regent Street
+
+ + + +
+ + + +
+
TM and copyright © 2008 Apple Inc. 1 Infinite Loop, MS 303-3DM, Cupertino, CA 95014.
+ +
+ + diff --git a/premailer/tests/test-external-links.css b/premailer/tests/test-external-links.css new file mode 100644 index 0000000..0f98320 --- /dev/null +++ b/premailer/tests/test-external-links.css @@ -0,0 +1,12 @@ +h1 { + color: blue; +} +h2 { + color: green; +} +a { + color: pink; +} +a:hover { + color: purple; +} diff --git a/premailer/tests/test-external-styles.css b/premailer/tests/test-external-styles.css new file mode 100644 index 0000000..6087e9f --- /dev/null +++ b/premailer/tests/test-external-styles.css @@ -0,0 +1,12 @@ +h1 { + color: brown; +} +h2::after { + content: ""; + display: block; +} +@media all and (max-width: 320px) { + h1 { + font-size: 12px; + } +} diff --git a/premailer/tests/test-issue78.html b/premailer/tests/test-issue78.html new file mode 100644 index 0000000..e664107 --- /dev/null +++ b/premailer/tests/test-issue78.html @@ -0,0 +1,21 @@ + + + + + + + +

h1

+

html

+ + diff --git a/premailer/tests/test-unicode.html b/premailer/tests/test-unicode.html new file mode 100644 index 0000000..f82e2f8 --- /dev/null +++ b/premailer/tests/test-unicode.html @@ -0,0 +1,15 @@ + + + + + Unicode Test + + + +

問題

+ + diff --git a/premailer/tests/test_cache.py b/premailer/tests/test_cache.py new file mode 100644 index 0000000..4f9d397 --- /dev/null +++ b/premailer/tests/test_cache.py @@ -0,0 +1,47 @@ +from __future__ import absolute_import, unicode_literals +import unittest + +from premailer.cache import function_cache +from nose.tools import raises + + +class TestCache(unittest.TestCase): + + @raises(TypeError) + def test_expected_max_entries_raise(self): + function_cache(expected_max_entries='testing') + + def test_auto_turn_off(self): + test = {'call_count': 0} + + def test_func(*args, **kwargs): + test['call_count'] += 1 + + cache_decorator = function_cache(expected_max_entries=2) + wrapper = cache_decorator(test_func) + wrapper(1, 1, t=1) + wrapper(1, 2, t=1) + # turn off + wrapper(1, 3, t=1) + # call 10 more times + for _ in range(10): + wrapper(1, 3, t=1) + + self.assertEqual(test['call_count'], 13) + + def test_cache_hit(self): + test = {'call_count': 0} + + def test_func(*args, **kwargs): + test['call_count'] += 1 + + cache_decorator = function_cache(expected_max_entries=20) + wrapper = cache_decorator(test_func) + wrapper(1, 1, t=1) + wrapper(1, 2, t=1) + # turn off + wrapper(1, 3, t=1) + # call 10 more times + for _ in range(10): + wrapper(1, 3, t=1) + self.assertEqual(test['call_count'], 3) diff --git a/premailer/tests/test_merge_style.py b/premailer/tests/test_merge_style.py new file mode 100644 index 0000000..411f7c9 --- /dev/null +++ b/premailer/tests/test_merge_style.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import, unicode_literals +import unittest +from premailer.merge_style import csstext_to_pairs, merge_styles + + +class TestMergeStyle(unittest.TestCase): + # test what is not cover in test_premailer + # should move them here + # smaller files are easier to work with + def test_csstext_to_pairs(self): + csstext = 'font-size:1px' + parsed_csstext = csstext_to_pairs(csstext) + self.assertEqual(('font-size', '1px'), parsed_csstext[0]) + + def test_inline_invalid_syntax(self): + # Invalid syntax does not raise + inline = '{color:pink} :hover{color:purple} :active{color:red}' + merge_styles(inline, [], []) diff --git a/premailer/tests/test_premailer.py b/premailer/tests/test_premailer.py new file mode 100644 index 0000000..32f45bb --- /dev/null +++ b/premailer/tests/test_premailer.py @@ -0,0 +1,2717 @@ +from __future__ import absolute_import, unicode_literals +import sys +import re +import unittest +import logging +from contextlib import contextmanager +if sys.version_info >= (3, ): # As in, Python 3 + from urllib.request import urlopen +else: # Python 2 + from urllib2 import urlopen + urlopen = urlopen # shut up pyflakes +from io import StringIO # Yes, the is an io lib in py2.x + +from nose.tools import eq_, ok_, assert_raises +import mock +from lxml.etree import fromstring, XMLSyntaxError + +from premailer.premailer import ( + transform, + Premailer, + merge_styles, + csstext_to_pairs, + ExternalNotFoundError, +) +from premailer.__main__ import main +import premailer.premailer # lint:ok + + +whitespace_between_tags = re.compile('>\s*<') + + +@contextmanager +def captured_output(): + new_out, new_err = StringIO(), StringIO() + old_out, old_err = sys.stdout, sys.stderr + try: + sys.stdout, sys.stderr = new_out, new_err + yield sys.stdout, sys.stderr + finally: + sys.stdout, sys.stderr = old_out, old_err + + +@contextmanager +def provide_input(content): + old_stdin = sys.stdin + sys.stdin = StringIO(content) + try: + with captured_output() as (out, err): + yield out, err + finally: + sys.stdin = old_stdin + sys.stdin = StringIO(content) + + +class MockResponse(object): + + def __init__(self, content): + self.text = content + + +def compare_html(one, two): + one = one.strip() + two = two.strip() + one = whitespace_between_tags.sub('>\n<', one) + two = whitespace_between_tags.sub('>\n<', two) + one = one.replace('><', '>\n<') + two = two.replace('><', '>\n<') + for i, line in enumerate(one.splitlines()): + other = two.splitlines()[i] + if line.lstrip() != other.lstrip(): + eq_(line.lstrip(), other.lstrip()) + + +class Tests(unittest.TestCase): + + def shortDescription(self): + # most annoying thing in the world about nose + pass + + def test_merge_styles_basic(self): + inline_style = 'font-size:1px; color: red' + new = 'font-size:2px; font-weight: bold' + expect = 'font-size:1px;', 'font-weight:bold;', 'color:red' + result = merge_styles(inline_style, [csstext_to_pairs(new)], ['']) + for each in expect: + ok_(each in result) + + def test_merge_styles_with_class(self): + inline_style = 'color:red; font-size:1px;' + new, class_ = 'font-size:2px; font-weight: bold', ':hover' + + # because we're dealing with dicts (random order) we have to + # test carefully. + # We expect something like this: + # {color:red; font-size:1px} :hover{font-size:2px; font-weight:bold} + + result = merge_styles(inline_style, [csstext_to_pairs(new)], [class_]) + ok_(result.startswith('{')) + ok_(result.endswith('}')) + ok_(' :hover{' in result) + split_regex = re.compile('{([^}]+)}') + eq_(len(split_regex.findall(result)), 2) + expect_first = 'color:red', 'font-size:1px' + expect_second = 'font-weight:bold', 'font-size:2px' + for each in expect_first: + ok_(each in split_regex.findall(result)[0]) + for each in expect_second: + ok_(each in split_regex.findall(result)[1]) + + def test_merge_styles_non_trivial(self): + inline_style = ( + 'background-image:url("")' + ) + new = 'font-size:2px; font-weight: bold' + expect = ( + 'background-image:url("")', + 'font-size:2px;', + 'font-weight:bold' + ) + result = merge_styles(inline_style, [csstext_to_pairs(new)], ['']) + for each in expect: + ok_(each in result) + + def test_merge_styles_with_unset(self): + inline_style = 'color: red' + new = 'font-size: 10px; font-size: unset; font-weight: bold' + expect = 'font-weight:bold;', 'color:red' + css_new = csstext_to_pairs(new) + result = merge_styles( + inline_style, + [css_new], + [''], + remove_unset_properties=True, + ) + for each in expect: + ok_(each in result) + ok_('font-size' not in result) + + def test_basic_html(self): + """test the simplest case""" + + html = """ + + Title + + + +

Hi!

+

Yes!

+ + """ + + expect_html = """ + + Title + + +

Hi!

+

Yes!

+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_remove_classes(self): + """test the simplest case""" + + html = """ + + Title + + + +

Yes!

+ + """ + + expect_html = """ + + Title + + +

Yes!

+ + """ + + p = Premailer(html, remove_classes=True) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_basic_html_shortcut_function(self): + """test the plain transform function""" + html = """ + + Title + + + +

Hi!

+

Yes!

+ + """ + + expect_html = """ + + Title + + +

Hi!

+

Yes!

+ + """ + + result_html = transform(html) + compare_html(expect_html, result_html) + + def test_empty_style_tag(self): + """empty style tag""" + + html = """ + + + + + + + """ + + expect_html = """ + + + + + + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_include_star_selector(self): + """test the simplest case""" + + html = """ + + Title + + + +

Hi!

+

Yes!

+ + """ + + expect_html_not_included = """ + + Title + + +

Hi!

+

Yes!

+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html_not_included, result_html) + + expect_html_star_included = """ + + Title + + +

Hi!

+

Yes!

+ + """ + + p = Premailer(html, include_star_selectors=True) + result_html = p.transform() + + compare_html(expect_html_star_included, result_html) + + def test_mixed_pseudo_selectors(self): + """mixing pseudo selectors with straight forward selectors""" + + html = """ + + Title + + + +

+ Page +

+ + """ + + expect_html = """ + + Title + + + +

Page

+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_basic_html_with_pseudo_selector(self): + """test the simplest case""" + + html = """ + + +

Peter

+

Hej

+ + """ + + expect_html = """ + + + + +

Peter

+

Hej

+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_parse_style_rules(self): + p = Premailer('html') # won't need the html + func = p._parse_style_rules + rules, leftover = func(""" + h1, h2 { color:red; } + /* ignore + this */ + strong { + text-decoration:none + } + ul li { list-style: 2px; } + a:hover { text-decoration: underline } + """, 0) + + # 'rules' is a list, turn it into a dict for + # easier assertion testing + rules_dict = {} + rules_specificity = {} + for specificity, k, v in rules: + rules_dict[k] = v + rules_specificity[k] = specificity + + ok_('h1' in rules_dict) + ok_('h2' in rules_dict) + ok_('strong' in rules_dict) + ok_('ul li' in rules_dict) + + eq_(rules_dict['h1'], 'color:red') + eq_(rules_dict['h2'], 'color:red') + eq_(rules_dict['strong'], 'text-decoration:none') + eq_(rules_dict['ul li'], 'list-style:2px') + ok_('a:hover' not in rules_dict) + + # won't need the html + p = Premailer('html', exclude_pseudoclasses=True) + func = p._parse_style_rules + rules, leftover = func(""" + ul li { list-style: 2px; } + a:hover { text-decoration: underline } + """, 0) + + eq_(len(rules), 1) + specificity, k, v = rules[0] + eq_(k, 'ul li') + eq_(v, 'list-style:2px') + + eq_(len(leftover), 1) + k, v = leftover[0] + eq_((k, v), ('a:hover', 'text-decoration:underline'), (k, v)) + + def test_precedence_comparison(self): + p = Premailer('html') # won't need the html + rules, leftover = p._parse_style_rules(""" + #identified { color:blue; } + h1, h2 { color:red; } + ul li { list-style: 2px; } + li.example { color:green; } + strong { text-decoration:none } + div li.example p.sample { color:black; } + """, 0) + + # 'rules' is a list, turn it into a dict for + # easier assertion testing + rules_specificity = {} + for specificity, k, v in rules: + rules_specificity[k] = specificity + + # Last in file wins + ok_(rules_specificity['h1'] < rules_specificity['h2']) + # More elements wins + ok_(rules_specificity['strong'] < rules_specificity['ul li']) + # IDs trump everything + ok_(rules_specificity['div li.example p.sample'] < + rules_specificity['#identified']) + + # Classes trump multiple elements + ok_(rules_specificity['ul li'] < + rules_specificity['li.example']) + + def test_base_url_fixer(self): + """if you leave some URLS as /foo and set base_url to + 'http://www.google.com' the URLS become 'http://www.google.com/foo' + """ + html = ''' + + Title + + + + + + + Home + External + Subpage + Internal Link + + + ''' + + expect_html = ''' + + Title + + + + + + + Home + External + Subpage + Internal Link + + ''' + + p = Premailer( + html, + base_url='http://kungfupeople.com', + preserve_internal_links=True + ) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_base_url_with_path(self): + """if you leave some URLS as /foo and set base_url to + 'http://www.google.com' the URLS become 'http://www.google.com/foo' + """ + + html = ''' + + Title + + + + + Home + External + External 2 + Subpage + Internal Link + + + ''' + + expect_html = ''' + + Title + + + + + Home + External + External 2 + Subpage + Internal Link + + ''' + + p = Premailer(html, base_url='http://kungfupeople.com/base/', + preserve_internal_links=True) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_style_block_with_external_urls(self): + """ + From http://github.com/peterbe/premailer/issues/#issue/2 + + If you have + body { background:url(http://example.com/bg.png); } + the ':' inside '://' is causing a problem + """ + + html = """ + + Title + + + +

Hi!

+ + """ + + expect_html = """ + + Title + + +

Hi!

+ + """.replace('exam\nple', 'example') + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_base_url_ignore_links(self): + """if you leave some URLS as /foo, set base_url to + 'http://www.google.com' and set disable_link_rewrites to True, the URLS + should not be changed. + """ + + html = ''' + + Title + + + + + Home + External + External 2 + Subpage + Internal Link + + + ''' + + expect_html = ''' + + Title + + + + + Home + External + External 2 + Subpage + Internal Link + + ''' + + p = Premailer(html, base_url='http://kungfupeople.com/base/', + disable_link_rewrites=True) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_shortcut_function(self): + # you don't have to use this approach: + # from premailer import Premailer + # p = Premailer(html, base_url=base_url) + # print p.transform() + # You can do it this way: + # from premailer import transform + # print transform(html, base_url=base_url) + + html = ''' + + + + +

Hi!

+ + ''' + + expect_html = ''' + + +

Hi!

+ + ''' + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def fragment_in_html(self, fragment, html, fullMessage=False): + if fullMessage: + message = '"{0}" not in\n{1}'.format(fragment, html) + else: + message = '"{0}" not in HTML'.format(fragment) + ok_(fragment in html, message) + + def test_css_with_pseudoclasses_included(self): + "Pick up the pseudoclasses too and include them" + + html = ''' + + + + + Special! + Page +

Paragraph

+ + ''' + + p = Premailer(html, exclude_pseudoclasses=False) + result_html = p.transform() + # because we're dealing with random dicts here we can't predict what + # order the style attribute will be written in so we'll look for + # things manually. + e = '

'\ + 'Paragraph

' + self.fragment_in_html(e, result_html, True) + + e = 'style="{color:red; border:1px solid green}' + self.fragment_in_html(e, result_html) + e = ' :visited{border:1px solid green}' + self.fragment_in_html(e, result_html) + e = ' :hover{text-decoration:none; border:1px solid green}' + self.fragment_in_html(e, result_html) + + def test_css_with_pseudoclasses_excluded(self): + "Skip things like `a:hover{}` and keep them in the style block" + + html = """ + + + + + Page +

Paragraph

+ + """ + + expect_html = """ + + + + +Page +

Paragraph

+ +""" + + p = Premailer(html, exclude_pseudoclasses=True) + result_html = p.transform() + + expect_html = whitespace_between_tags.sub('><', expect_html).strip() + result_html = whitespace_between_tags.sub('><', result_html).strip() + + expect_html = re.sub('}\s+', '}', expect_html) + result_html = result_html.replace('}\n', '}') + + eq_(expect_html, result_html) + # XXX + + def test_css_with_html_attributes(self): + """Some CSS styles can be applied as normal HTML attribute like + 'background-color' can be turned into 'bgcolor' + """ + + html = """ + + + + +

Text

+ + + + + +
Cell 1Cell 2
+ + """ + + expect_html = """ + + + +

Text

+ + + + + +
Cell 1Cell 2
+ + """.replace('vert\nical', 'vertical') + + p = Premailer(html, exclude_pseudoclasses=True) + result_html = p.transform() + + expect_html = re.sub('}\s+', '}', expect_html) + result_html = result_html.replace('}\n', '}') + + compare_html(expect_html, result_html) + + def test_css_disable_basic_html_attributes(self): + """Some CSS styles can be applied as normal HTML attribute like + 'background-color' can be turned into 'bgcolor' + """ + + html = """ + + + + +

Text

+ + + + + +
Cell 1Cell 2
+ + """ + + expect_html = """ + + + +

Text

+ + + + + +
Cell 1Cell 2
+ + """ + + p = Premailer( + html, + exclude_pseudoclasses=True, + disable_basic_attributes=['align', 'width', 'height'] + ) + result_html = p.transform() + + expect_html = re.sub('}\s+', '}', expect_html) + result_html = result_html.replace('}\n', '}') + + compare_html(expect_html, result_html) + + def test_apple_newsletter_example(self): + # stupidity test + import os + + html_file = os.path.join('premailer', 'tests', + 'test-apple-newsletter.html') + html = open(html_file).read() + + p = Premailer(html, exclude_pseudoclasses=False, + keep_style_tags=True, + strip_important=False) + result_html = p.transform() + ok_('' in result_html) + ok_('' in result_html) + + def test_mailto_url(self): + """if you use URL with mailto: protocol, they should stay as mailto: + when baseurl is used + """ + + html = """ + + Title + + + e-mail@example.com + + """ + + expect_html = """ + + Title + + + e-mail@example.com + + """ + + p = Premailer(html, base_url='http://kungfupeople.com') + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_tel_url(self): + """if you use URL with tel: protocol, it should stay as tel: + when baseurl is used + """ + + html = """ + + Title + + + 202-555-0113 + + """ + + p = Premailer(html, base_url='http://kungfupeople.com') + result_html = p.transform() + + compare_html(result_html, html) + + def test_uppercase_margin(self): + """Option to comply with outlook.com + + https://emailonacid.com/blog/article/email-development/outlook.com-does-support-margins + """ + + html = """ + +Title + + + +

a

+

+b +

+ +""" + + expect_html = """ + +Title + + +

a

+

+b +

+ +""" + + p = Premailer(html, capitalize_float_margin=True) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_strip_important(self): + """Get rid of !important. Makes no sense inline.""" + html = """ + + + + +

Paragraph

+ + + """ + expect_html = """ + + + +

Paragraph

+ +""" + + p = Premailer(html, strip_important=True) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_inline_wins_over_external(self): + html = """ + + + + +
Some text
+ + """ + + expect_html = """ + + + +
Some text
+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_last_child(self): + html = """ + + + + +
First child
+
Last child
+ + """ + + expect_html = """ + + + +
First child
+
Last child
+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_last_child_exclude_pseudo(self): + html = """ + + + + +
First child
+
Last child
+ + """ + + expect_html = """ + + + +
First child
+
Last child
+ + """ + + p = Premailer(html, exclude_pseudoclasses=True) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_mediaquery(self): + html = """ + + + + +
First div
+ + """ + + expect_html = """ + + + + +
First div
+ + """ + + p = Premailer(html, strip_important=False) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_child_selector(self): + html = """ + + + + +
First div
+ + """ + + expect_html = """ + + + +
First div
+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_doctype(self): + html = ( + '' + """ + + + + + """ + ) + + expect_html = ( + '' + """ + + + + + """ + ) + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_prefer_inline_to_class(self): + html = """ + + + + +
+ + """ + + expect_html = """ + + + +
+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_favour_rule_with_element_over_generic(self): + html = """ + + + + +
+ + """ + + expect_html = """ + + + +
+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_favour_rule_with_class_over_generic(self): + html = """ + + + + +
+ + """ + + expect_html = """ + + + +
+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_favour_rule_with_id_over_others(self): + html = """ + + + + +
+ + """ + + expect_html = """ + + + +
+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_favour_rule_with_important_over_others(self): + html = """ + + + + +
+ + """ + + expect_html = """ + + + +
+ +""" + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_multiple_style_elements(self): + """Asserts that rules from multiple style elements + are inlined correctly.""" + + html = """ + + Title + + + + +

Hi!

+

Yes!

+ + """ + + expect_html = """ + + Title + + +

Hi!

+

Yes!

+ + """.replace('deco\nration', 'decoration') + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_style_attribute_specificity(self): + """Stuff already in style attributes beats style tags.""" + + html = """ + + Title + + + +

Hi!

+ + """ + + expect_html = """ + + Title + + +

Hi!

+ + """ + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_ignore_style_elements_with_media_attribute(self): + """Asserts that style elements with media attributes other than + 'screen' are ignored.""" + + html = """ + + Title + + + + + +

Hi!

+

Yes!

+ + """ + + expect_html = """ + + Title + + + +

Hi!

+

Yes!

+ + """.replace('deco\nration', 'decoration') + + p = Premailer(html) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_leftover_important(self): + """Asserts that leftover styles should be marked as !important.""" + + html = """ + + Title + + + + Hi! + + """ + + expect_html = """ + + Title + + + + Hi! + + """ + + p = Premailer(html, + keep_style_tags=True, + strip_important=False) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_basic_xml(self): + """Test the simplest case with xml""" + + html = """ + + Title + + + + test + + + """ + + expect_html = """ + + Title + + + test + + + """ + + p = Premailer(html, method="xml") + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_broken_xml(self): + """Test the simplest case with xml""" + + html = """ + + Title + <style type="text/css"> + img { border: none; } + </style> + </head> + <body> + <img src="test.png" alt="test"/> + </body> + """ + + p = Premailer(html, method="xml") + assert_raises( + XMLSyntaxError, + p.transform, + ) + + def test_xml_cdata(self): + """Test that CDATA is set correctly on remaining styles""" + + html = """<html> + <head> + <title>Title + + + + Test + + + """ + + expect_html = """ + + Title + + + + Test + + + """.replace('back\nground', 'background') + + p = Premailer(html, method="xml") + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_command_line_fileinput_from_stdin(self): + html = '

Title

' + expect_html = """ + + +

Title

+ + """ + + with provide_input(html) as (out, err): + main([]) + result_html = out.getvalue().strip() + + compare_html(expect_html, result_html) + + def test_command_line_fileinput_from_argument(self): + with captured_output() as (out, err): + main([ + '-f', + 'premailer/tests/test-apple-newsletter.html', + '--disable-basic-attributes=bgcolor' + ]) + + result_html = out.getvalue().strip() + + ok_('' in result_html) + ok_('' in result_html) + + def test_command_line_preserve_style_tags(self): + with captured_output() as (out, err): + main([ + '-f', + 'premailer/tests/test-issue78.html', + '--preserve-style-tags', + '--external-style=premailer/tests/test-external-styles.css', + ]) + + result_html = out.getvalue().strip() + + expect_html = """ + + + + + + + + +

h1

+

html

+ + + """.replace('col\nor', 'color').replace('applic\nation', 'application') + + compare_html(expect_html, result_html) + + # for completeness, test it once without + with captured_output() as (out, err): + main([ + '-f', + 'premailer/tests/test-issue78.html', + '--external-style=premailer/tests/test-external-styles.css', + ]) + + result_html = out.getvalue().strip() + expect_html = """ + + + + + + + +

h1

+

html

+ + + """.replace('co\nlor', 'color').replace('applic\nation', 'application') + + compare_html(expect_html, result_html) + + def test_multithreading(self): + """The test tests thread safety of merge_styles function which employs + thread non-safe cssutils calls. + The test would fail if merge_styles would have not been thread-safe """ + + import threading + import logging + THREADS = 30 + REPEATS = 100 + + class RepeatMergeStylesThread(threading.Thread): + """The thread is instantiated by test and run multiple + times in parallel.""" + exc = None + + def __init__(self, old, new, class_): + """The constructor just stores merge_styles parameters""" + super(RepeatMergeStylesThread, self).__init__() + self.old, self.new, self.class_ = old, new, class_ + + def run(self): + """Calls merge_styles in a loop and sets exc attribute + if merge_styles raises an exception.""" + for _ in range(0, REPEATS): + try: + merge_styles(self.old, self.new, self.class_) + except Exception as e: + logging.exception("Exception in thread %s", self.name) + self.exc = e + + inline_style = 'background-color:#ffffff;' + new = 'background-color:#dddddd;' + class_ = '' + + # start multiple threads concurrently; each + # calls merge_styles many times + threads = [ + RepeatMergeStylesThread( + inline_style, + [csstext_to_pairs(new)], + [class_] + ) + for _ in range(0, THREADS) + ] + for t in threads: + t.start() + + # wait until all threads are done + for t in threads: + t.join() + + # check if any thread raised exception while in merge_styles call + exceptions = [t.exc for t in threads if t.exc is not None] + eq_(exceptions, []) + + def test_external_links(self): + """Test loading stylesheets via link tags""" + + html = """ + + Title + + + + + + +

Hello

+

World

+

Test

+ Link + + """.replace( + 'applic\naction', 'application' + ).replace('style\nsheet', 'stylesheet') + + expect_html = """ + + Title + + + + +

Hello

+

World

+

Test

+ Link + + """.replace('applic\naction', 'application') + + p = Premailer( + html, + strip_important=False + ) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_external_links_unfindable(self): + """Test loading stylesheets that can't be found""" + + html = """ + + Title + + + + + +

Hello

+

World

+

Test

+ Link + + """ + + p = Premailer( + html, + strip_important=False + ) + assert_raises( + ExternalNotFoundError, + p.transform, + ) + + def test_external_styles_and_links(self): + """Test loading stylesheets via both the 'external_styles' + argument and link tags""" + + html = """ + + + + + +

Hello

+

Hello

+ Hello + + """ + + expect_html = """ + + + + + +

Hello

+

Hello

+ Hello + + """.replace('cont\nent', 'content') + + p = Premailer( + html, + strip_important=False, + external_styles='test-external-styles.css', + base_path='premailer/tests/') + result_html = p.transform() + + compare_html(expect_html, result_html) + + @mock.patch('premailer.premailer.requests') + def test_load_external_url(self, mocked_requests): + 'Test premailer.premailer.Premailer._load_external_url' + faux_response = 'This is not a response' + faux_uri = 'https://example.com/site.css' + mocked_requests.get.return_value = MockResponse(faux_response) + p = premailer.premailer.Premailer('

A paragraph

') + r = p._load_external_url(faux_uri) + + mocked_requests.get.assert_called_once_with(faux_uri) + eq_(faux_response, r) + + def test_css_text(self): + """Test handling css_text passed as a string""" + + html = """ + + + +

Hello

+

Hello

+ Hello + + """ + + expect_html = """ + + + + +

Hello

+

Hello

+ Hello + + """ + + css_text = """ + h1 { + color: brown; + } + h2 { + color: green; + } + a { + color: pink; + } + @media all and (max-width: 320px) { + h1 { + color: black; + } + } + + """ + + p = Premailer( + html, + strip_important=False, + css_text=[css_text]) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_css_text_with_only_body_present(self): + """Test handling css_text passed as a string when no or + is present""" + + html = """ +

Hello

+

Hello

+ Hello + """ + + expect_html = """ + + + + +

Hello

+

Hello

+ Hello + + """ + + css_text = """ + h1 { + color: brown; + } + h2 { + color: green; + } + a { + color: pink; + } + @media all and (max-width: 320px) { + h1 { + color: black; + } + } + """ + + p = Premailer( + html, + strip_important=False, + css_text=css_text) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_css_disable_leftover_css(self): + """Test handling css_text passed as a string when no or + is present""" + + html = """ +

Hello

+

Hello

+ Hello + """ + + expect_html = """ + +

Hello

+

Hello

+ Hello + + """ + + css_text = """ + h1 { + color: brown; + } + h2 { + color: green; + } + a { + color: pink; + } + @media all and (max-width: 320px) { + h1 { + color: black; + } + } + """ + + p = Premailer( + html, + strip_important=False, + css_text=css_text, + disable_leftover_css=True) + result_html = p.transform() + + compare_html(expect_html, result_html) + + @staticmethod + def mocked_urlopen(url): + 'The standard "response" from the "server".' + retval = '' + if 'style1.css' in url: + retval = "h1 { color: brown }" + elif 'style2.css' in url: + retval = "h2 { color: pink }" + elif 'style3.css' in url: + retval = "h3 { color: red }" + return retval + + @mock.patch.object(Premailer, '_load_external_url') + def test_external_styles_on_http(self, mocked_pleu): + """Test loading styles that are genuinely external""" + + html = """ + + + + + + +

Hello

+

World

+

World

+ + """ + mocked_pleu.side_effect = self.mocked_urlopen + p = Premailer(html) + result_html = p.transform() + + # Expected values are tuples of the positional values (as another + # tuple) and the ketword arguments (which are all null), hence the + # following Lisp-like explosion of brackets and commas. + expected_args = [(('https://www.com/style1.css',),), + (('http://www.com/style2.css',),), + (('http://www.com/style3.css',),)] + eq_(expected_args, mocked_pleu.call_args_list) + + expect_html = """ + + + +

Hello

+

World

+

World

+ + """ + compare_html(expect_html, result_html) + + @mock.patch.object(Premailer, '_load_external_url') + def test_external_styles_on_https(self, mocked_pleu): + """Test loading styles that are genuinely external""" + + html = """ + + + + + + +

Hello

+

World

+

World

+ + """ + + mocked_pleu.side_effect = self.mocked_urlopen + p = Premailer(html, base_url='https://www.peterbe.com') + result_html = p.transform() + + expected_args = [(('https://www.com/style1.css',),), + (('https://www.com/style2.css',),), + (('https://www.peterbe.com/style3.css',),)] + eq_(expected_args, mocked_pleu.call_args_list) + expect_html = """ + + + +

Hello

+

World

+

World

+ + """ + compare_html(expect_html, result_html) + + @mock.patch.object(Premailer, '_load_external_url') + def test_external_styles_with_base_url(self, mocked_pleu): + """Test loading styles that are genuinely external if you use + the base_url""" + + html = """ + + + + +

Hello

+ + """ + mocked_pleu.return_value = "h1 { color: brown }" + p = Premailer(html, base_url='http://www.peterbe.com/') + result_html = p.transform() + expected_args = [(('http://www.peterbe.com/style.css',),), ] + eq_(expected_args, mocked_pleu.call_args_list) + + expect_html = """ + + + +

Hello

+ + """ + compare_html(expect_html, result_html) + + def test_disabled_validator(self): + """test disabled_validator""" + + html = """ + + Title + + + +

Hi!

+

Yes!

+ + """ + + expect_html = """ + + Title + + +

Hi!

+

Yes!

+ + """ + + p = Premailer(html, disable_validation=True) + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_comments_in_media_queries(self): + """CSS comments inside a media query block should not be a problem""" + html = """ + + + + Document + + + + """ + + p = Premailer(html, disable_validation=True) + result_html = p.transform() + ok_('/* comment */' in result_html) + + def test_fontface_selectors_with_no_selectortext(self): + """ + @font-face selectors are weird. + This is a fix for https://github.com/peterbe/premailer/issues/71 + """ + html = """ + + + + Document + + + + """ + + p = Premailer(html, disable_validation=True) + p.transform() # it should just work + + def test_keyframe_selectors(self): + """ + keyframes shouldn't be a problem. + """ + html = """ + + + + Document + + + + """ + + p = Premailer(html, disable_validation=True) + p.transform() # it should just work + + def test_capture_cssutils_logging(self): + """you can capture all the warnings, errors etc. from cssutils + with your own logging. """ + html = """ + + + + Document + + + + """ + + mylog = StringIO() + myhandler = logging.StreamHandler(mylog) + p = Premailer( + html, + cssutils_logging_handler=myhandler, + ) + p.transform() # it should work + eq_( + mylog.getvalue(), + 'CSSStylesheet: Unknown @rule found. [2:13: @keyframes]\n' + ) + + # only log errors now + mylog = StringIO() + myhandler = logging.StreamHandler(mylog) + p = Premailer( + html, + cssutils_logging_handler=myhandler, + cssutils_logging_level=logging.ERROR, + ) + p.transform() # it should work + eq_(mylog.getvalue(), '') + + def test_type_test(self): + """test the correct type is returned""" + + html = """ + + Title + + + +

Hi!

+

Yes!

+ + """ + + p = Premailer(html) + result = p.transform() + eq_(type(result), type("")) + + html = fromstring(html) + etree_type = type(html) + + p = Premailer(html) + result = p.transform() + ok_(type(result) != etree_type) + + def test_ignore_some_inline_stylesheets(self): + """test that it's possible to put a `data-premailer="ignore"` + attribute on a + + + +

Hello

+

World

+ + """ + + expect_html = """ + + Title + + + +

Hello

+

World

+ + """ + + p = Premailer(html, disable_validation=True) + result_html = p.transform() + compare_html(expect_html, result_html) + + @mock.patch('premailer.premailer.warnings') + def test_ignore_some_incorrectly(self, warnings_mock): + """You can put `data-premailer="ignore"` but if the attribute value + is something we don't recognize you get a warning""" + + html = """ + + Title + + + +

Hello

+

World

+ + """ + + expect_html = """ + + Title + + +

Hello

+

World

+ + """ + + p = Premailer(html, disable_validation=True) + result_html = p.transform() + warnings_mock.warn.assert_called_with( + "Unrecognized data-premailer attribute ('blah')" + ) + + compare_html(expect_html, result_html) + + def test_ignore_some_external_stylesheets(self): + """test that it's possible to put a `data-premailer="ignore"` + attribute on a tag and it gets left alone (except that + the attribute gets removed)""" + + # Know thy fixtures! + # The test-external-links.css has a `h1{color:blue}` + # And the test-external-styles.css has a `h1{color:brown}` + html = """ + + Title + + + + +

Hello

+ + """ + + # Note that the `test-external-links.css` gets converted to a inline + # style sheet. + expect_html = """ + +Title + + + + +

Hello

+ +""".replace('style\nsheet', 'stylesheet') + + p = Premailer(html, disable_validation=True) + result_html = p.transform() + compare_html(expect_html, result_html) + + def test_turnoff_cache_works_as_expected(self): + html = """ + + + + +
+ + """ + + expect_html = """ + + + +
+ + """ + + p = Premailer(html, cache_css_parsing=False) + self.assertFalse(p.cache_css_parsing) + # run one time first + p.transform() + result_html = p.transform() + + compare_html(expect_html, result_html) + + def test_links_without_protocol(self): + """If you the base URL is set to https://example.com and your html + contains ... then the URL to point to + is "https://otherdomain.com/" not "https://example.com/file.css" + """ + html = """ + + + + + + """ + + expect_html = """ + + + + + + """ + + p = Premailer(html, base_url='https://www.peterbe.com') + result_html = p.transform() + compare_html(expect_html.format(protocol="https"), result_html) + + p = Premailer(html, base_url='http://www.peterbe.com') + result_html = p.transform() + compare_html(expect_html.format(protocol="http"), result_html) + + # Because you can't set a base_url without a full protocol + p = Premailer(html, base_url='www.peterbe.com') + assert_raises(ValueError, p.transform) + + def test_align_float_images(self): + + html = """ + + Title + + + +

text + text + text + + """ + + expect_html = """ + +Title + + +

text + text + text +

+ +""" + + p = Premailer(html, align_floating_images=True) + result_html = p.transform() + compare_html(expect_html, result_html) + + def test_remove_unset_properties(self): + html = """ + + + + +
+ + """ + + expect_html = """ + + + +
+
+ + """ + + p = Premailer(html, remove_unset_properties=True) + self.assertTrue(p.remove_unset_properties) + result_html = p.transform() + compare_html(expect_html, result_html) + + def test_six_color(self): + r = Premailer.six_color('#cde') + e = '#ccddee' + self.assertEqual(e, r) + + def test_3_digit_color_expand(self): + 'Are 3-digit color values expanded into 6-digits for IBM Notes' + html = """ + + +

color test

+

+ This is a test of color handling. +

+ +""" + expect_html = """ + + + +

color test

+

+ This is a test of color handling. +

+ +""" + p = Premailer(html, remove_unset_properties=True) + result_html = p.transform() + compare_html(expect_html, result_html) + + def test_inline_important(self): + 'Are !important tags preserved inline.' + + html = """ + + + + + +
blah
+ +""" + + expect_html = """ + + + + + +
blah
+ +""" + p = Premailer( + html, + remove_classes=False, + keep_style_tags=True, + strip_important=False + ) + result_html = p.transform() + compare_html(expect_html, result_html) + + def test_pseudo_selectors_without_selector(self): + """Happens when you have pseudo selectors without an actual selector. + Which means it's not possible to find it in the DOM. + + For example: + + + + Semantic-UI uses this in its normalizer. + + Original issue: https://github.com/peterbe/premailer/issues/184 + """ + + html = """ + + +

Hey

+ + """ + + expect_html = """ + + + + + +

Hey

+ + + """ + p = Premailer( + html, + exclude_pseudoclasses=False, + keep_style_tags=True, + ) + result_html = p.transform() + compare_html(expect_html, result_html) diff --git a/premailer/tests/test_utils.py b/premailer/tests/test_utils.py new file mode 100644 index 0000000..467608d --- /dev/null +++ b/premailer/tests/test_utils.py @@ -0,0 +1,19 @@ +import unittest + +from premailer.premailer import capitalize_float_margin + + +class UtilsTestCase(unittest.TestCase): + def testcapitalize_float_margin(self): + self.assertEqual( + capitalize_float_margin('margin:1em'), + 'Margin:1em') + self.assertEqual( + capitalize_float_margin('margin-left:1em'), + 'Margin-left:1em') + self.assertEqual( + capitalize_float_margin('float:right;'), + 'Float:right;') + self.assertEqual( + capitalize_float_margin('float:right;color:red;margin:0'), + 'Float:right;color:red;Margin:0') From d72c197f2ae1dd65fbc418f700a5e80fd16179f7 Mon Sep 17 00:00:00 2001 From: Peter Feichtinger Date: Sun, 23 Sep 2018 19:19:38 +0200 Subject: [PATCH 3/5] Preprocess: add premailer option to drop style tags This adds an option to premailer for dropping style tags even if there are leftover rules. The cssless preprocessing would drop those tags after premailer processing anyway, this way we don't even keep them. --- commands/preprocess_cssless.py | 9 +++------ premailer/premailer.py | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/commands/preprocess_cssless.py b/commands/preprocess_cssless.py index 7162c83..e930f0e 100644 --- a/commands/preprocess_cssless.py +++ b/commands/preprocess_cssless.py @@ -20,7 +20,6 @@ from lxml import html from lxml import etree from io import StringIO -from lxml.etree import strip_elements import logging import re import os @@ -35,7 +34,6 @@ def preprocess_html_merge_cssless(src_path, dst_path): root = etree.fromstring(stripped, parser) output = preprocess_html_merge_css(root, src_path) - strip_style_tags(root) remove_display_none(root) convert_span_tables_to_tr_td(root) convert_inline_block_elements_to_table(root) @@ -70,14 +68,13 @@ def preprocess_html_merge_css(root, src_path): with warnings.catch_warnings(): warnings.simplefilter("ignore") premailer = Premailer(root, base_url=src_path, - disable_link_rewrites=True, remove_classes=True) + disable_link_rewrites=True, + remove_classes=True, + drop_style_tags=True) root = premailer.transform().getroot() return output.getvalue() -def strip_style_tags(root): - strip_elements(root, 'style') - def needs_td_wrapper(element): # element has table:row if len(element.getchildren()) == 0: diff --git a/premailer/premailer.py b/premailer/premailer.py index fb2bc08..a015a04 100644 --- a/premailer/premailer.py +++ b/premailer/premailer.py @@ -121,6 +121,7 @@ def __init__(self, html, base_url=None, preserve_inline_attachments=True, exclude_pseudoclasses=True, keep_style_tags=False, + drop_style_tags=False, include_star_selectors=False, remove_classes=False, capitalize_float_margin=False, @@ -156,6 +157,9 @@ def __init__(self, html, base_url=None, # whether to delete the