Skip to content

validator_tools.py

clean_error_msg(msg)

Cleans up the msg to remove unwanted details.

This function removes new lines, added spaces, and strips spaces.

Parameters:

Name Type Description Default
msg str

a string message.

required

Returns:

Name Type Description
msg str

cleaned of all unnecessary text.

Source code in webcode_tk/validator_tools.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def clean_error_msg(msg: str) -> str:
    """Cleans up the msg to remove unwanted details.

    This function removes new lines, added spaces, and strips spaces.

    Args:
        msg: a string message.

    Returns:
        msg: cleaned of all unnecessary text."""
    msg = msg.replace("\n", "")
    msg = re.sub(r"[ ]{2,}", " ", msg)
    msg = msg.replace(" :", ":")
    msg = msg.replace("“", '"')
    msg = msg.replace("”", '"')
    msg = msg.strip()
    return msg

get_css_errors_list(val_results)

Extracts a list of CSS errors from the CSS validator results.

This function takes a ResultSet of Tags from the validate_css() function, and extracts all errors as a list of strings.

Parameters:

Name Type Description Default
val_results ResultSet

the results from the [CSS validator]

required

Returns:

Name Type Description
error_list list

a list of any error messages from the ResultSet. Each error message is in string format.

Source code in webcode_tk/validator_tools.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def get_css_errors_list(val_results: bs4.ResultSet) -> list:
    """Extracts a list of CSS errors from the CSS validator results.

    This function takes a ResultSet of Tags from the validate_css()
    function, and extracts all errors as a list of strings.

    Args:
        val_results: the results from the [CSS validator]
        (jigsaw.w3.org/css-validator)

    Returns:
        error_list: a list of any error messages from the
            ResultSet. Each error message is in string format.
    """
    soup = bs4.BeautifulSoup(str(val_results), "lxml")
    errors = soup.find_all("td")
    num_errors = len(errors)
    error_list = []
    for i in range(num_errors):
        # every 3rd TD has the error message
        if (i - 2) % 3 == 0:
            msg = errors[i].text
            msg = clean_error_msg(msg)
            error_list.append(msg)
    return error_list

get_html_file_names(dir_path='.' + os.sep + 'project')

Gets a list of all html documents from directory path.

This function takes a directory path (if provided) in string form and returns a list of all HTML document paths from that directory. If no path is provided, it assumes there's a project folder in the root of the project folder, and it will check there.

Parameters:

Name Type Description Default
dir_path str

a path to the directory you want to check. It has a default directory of project/ in case no directory is provided.

'.' + sep + 'project'

Returns:

Name Type Description
names list

a list of filenames as relative links to the HTML documents in the directory.

Source code in webcode_tk/validator_tools.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def get_html_file_names(dir_path=r"." + os.sep + "project") -> list:
    """Gets a list of all html documents from directory path.

    This function takes a directory path (if provided) in string form
    and returns a list of all HTML document paths from that directory.
    If no path is provided, it assumes there's a project folder in the
    root of the project folder, and it will check there.

    Args:
        dir_path (str): a path to the directory you want to check.
            It has a default directory of `project/` in case no
            directory is provided.

    Returns:
        names: a list of filenames as relative links to the HTML
            documents in the directory."""
    names = []

    # remove final slash if present
    if dir_path[-1] == "/":
        dir_path = dir_path[:-1]
    for subdir, _dirs, files in os.walk(dir_path):
        for filename in files:
            # if using posix (forward slash), use posix
            # otherwise, use the os.sep (for Windows paths)
            if "/" in subdir:
                file_path = subdir + "/" + filename
            else:
                file_path = subdir + os.sep + filename
            if file_path.endswith(".html"):
                names.append(file_path)
    return names

get_markup_validity(file_path)

returns a list of errors from a file.

This function takes the contents of a file and runs it through the W3C validator and returns a list of warnings and errors from the validator in a dictionary object. If there are no warnings or errors, it returns an empty list. It also checks the response code, and if it's not 200, then it returns an alert message (in the form of a list).

Parameters:

Name Type Description Default
file_path str

the relative path to an HTML or CSS document (in relationship to the root of the python project.

required

Returns:

Name Type Description
errors list

a list of dictionary types (converted from the JSON response from the validator.

Source code in webcode_tk/validator_tools.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def get_markup_validity(file_path: str) -> list:
    """returns a list of errors from a file.

    This function takes the contents of a file and runs it through
    the [W3C validator](https://validator.w3.org/nu/?out=json) and
    returns a list of warnings and errors from the validator in a
    dictionary object. If there are no warnings or errors, it returns
    an empty list. It also checks the response code, and if it's not
    200, then it returns an alert message (in the form of a list).

    Args:
        file_path: the relative path to an HTML or CSS document (in
            relationship to the root of the python project.

    Returns:
        errors: a list of dictionary types (converted from the JSON
            response from the validator."""
    errors = []
    # payload = open(file_path)
    with open(file_path, "rb") as payload:
        headers = {
            "content-type": "text/html; charset=utf-8",
            "Accept-Charset": "UTF-8",
        }

        r = requests.post(w3cURL, data=payload, headers=headers)
        errors = r.json()
        errors = errors.get("messages")

        # raise the alarm if the response code is not 200
        if r.status_code != 200:
            errors = [
                {
                    "type": "ALERT!",
                    "lastLine": "NA",
                    "lastColumn": "NA",
                    "firstColumn": "NA",
                    "message": "Problems connecting with the validator - "
                    "probably no connection",
                    "extract": "NA",
                    "hiliteStart": "NA",
                    "hiliteLength": "NA",
                }
            ]
    return errors

get_num_errors(report)

Gets the number of errors from a list.

Parameters:

Name Type Description Default
report list

a list of error messages.

required

Returns:

Name Type Description
num_errors int

how many error messages there are.

Source code in webcode_tk/validator_tools.py
18
19
20
21
22
23
24
25
26
27
def get_num_errors(report: list) -> int:
    """Gets the number of errors from a list.

    Args:
        report: a list of error messages.

    Returns:
        num_errors: how many error messages there are."""
    num_errors = len(report)
    return num_errors

get_num_html_files(dir_path='.' + os.sep + 'project')

Returns the number of HTML documents in project folder.

This function will look into the project directory (dir_path or the default project location, which is "project" inside of the root Python project folder. It will return the number of all HTML documents (including folders nested inside of dir_path).

Parameters:

Name Type Description Default
dir_path str

the path to the folder you want to check. It has a default location of project (inside the root folder of your python project.

'.' + sep + 'project'

Returns:

Name Type Description
num_html_files int

the number of HTML documents within all folders of the provided (or default) project folder.

Source code in webcode_tk/validator_tools.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def get_num_html_files(dir_path=r"." + os.sep + "project") -> int:
    """Returns the number of HTML documents in project folder.

    This function will look into the project directory (`dir_path` or
    the default project location, which is "project" inside of the
    root Python project folder. It will return the number of all HTML
    documents (including folders nested inside of `dir_path`).

    Args:
        dir_path (str): the path to the folder you want to check. It
            has a default location of project (inside the root folder
            of your python project.

    Returns:
        num_html_files: the number of HTML documents within all
            folders of the provided (or default) project folder."""
    html_files = get_html_file_names(dir_path)
    num_html_files = len(html_files)
    return num_html_files

get_num_markup_errors(markup_response)

Gets the number of markup errors (not warnings).

This function sifts through the online validator response and counts the number of errors only (ignores any warnings).

Parameters:

Name Type Description Default
markup_response list

a list a markup errors and warnings from the online validator.

required

Returns:

Name Type Description
count int

the number of errors in the validator response.

Source code in webcode_tk/validator_tools.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def get_num_markup_errors(markup_response: list) -> int:
    """Gets the number of markup errors (not warnings).

    This function sifts through the online validator response and
    counts the number of errors only (ignores any warnings).

    Args:
        markup_response: a list a markup errors and warnings from the
            online validator.

    Returns:
        count: the number of errors in the validator response."""
    count = 0
    for i in markup_response:
        if i["type"] == "error":
            count += 1
    return count

get_num_markup_warnings(markup_errors)

Gets the number of markup warnings (not errors).

This function sifts through the online validator response and counts the number of warnings only (ignores any errors).

Parameters:

Name Type Description Default
markup_errors list

a list a markup errors and warnings from the online validator.

required

Returns:

Name Type Description
count int

the number of warnings in the validator response.

Source code in webcode_tk/validator_tools.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def get_num_markup_warnings(markup_errors: list) -> int:
    """Gets the number of markup warnings (not errors).

    This function sifts through the online validator response and
    counts the number of warnings only (ignores any errors).

    Args:
        markup_errors: a list a markup errors and warnings from the
            online validator.

    Returns:
        count: the number of warnings in the validator response."""
    count = 0
    for i in markup_errors:
        if i["type"] == "info":
            count += 1
    return count

get_project_validation(project_dir, type='html')

returns a report on HTML or CSS validation per HTML file.

You choose the project folder and the type (html or css), and it will return a list of per-files errors

Source code in webcode_tk/validator_tools.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def get_project_validation(project_dir: str, type="html") -> list:
    """returns a report on HTML or CSS validation per HTML file.

    You choose the project folder and the type (html or css), and it
    will return a list of per-files errors
    """
    report = []
    passing_files = []
    all_files = clerk.get_all_project_files(project_dir)
    for file in all_files:
        errors = []
        file_type = clerk.get_file_type(file)
        filename = clerk.get_file_name(file)
        if type == "html" and file_type == "html":
            errors = get_markup_validity(file)
            if errors:
                report.append(
                    f"fail: {filename} has {len(errors)} validation errors."
                )
            else:
                passing_files.append(filename)
        else:
            if file_type == "html" and type == "css":
                style_tag = html.get_elements("style", file)
                if style_tag:
                    code = html.get_element_content(style_tag)
                    result = validate_css(code)
                    errors_list = get_css_errors_list(result)
                    if errors_list:
                        errors += errors_list
                    else:
                        passing_files.append(filename)
            if file_type == "css":
                code = clerk.file_to_string(file)
                result = validate_css(code)
                errors_list = get_css_errors_list(result)
                if errors_list:
                    errors += errors_list
                else:
                    passing_files.append(filename)
            if errors:
                report.append(
                    f"fail: {filename} has {len(errors)} css errors."
                )
    if not report:
        if passing_files:
            for passing_file in passing_files:
                msg = f"pass: {passing_file} passes {type.upper()} validation"
                report.append(msg)
        if not passing_files:
            report.append("fail: no files present to validate")
    # TODO - make sure this covers all scenarios
    return report

is_css_valid(validator_results)

Checks to make sure CSS code is valid

Source code in webcode_tk/validator_tools.py
216
217
218
219
220
def is_css_valid(validator_results):
    """Checks to make sure CSS code is valid"""
    # create a soup of validator results
    soup = BeautifulSoup(str(validator_results[0]), "html.parser")
    return bool(soup.find(id="congrats"))

validate_css(css_code)

Validates CSS and returns the results from the css-validator.

This function will send any CSS code as a string to the W3.org css validator using a mechanicalsoup browser, and it will return the validator results as a ResultSet (a list of query results in the form of bs4 Tags).

Parameters:

Name Type Description Default
css_code str

CSS code in the form of a string.

required

Returns:

Name Type Description
results ResultSet

A ResultSet of Tag objects.

Source code in webcode_tk/validator_tools.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def validate_css(css_code: str) -> bs4.ResultSet:
    """Validates CSS and returns the results from the css-validator.

    This function will send any CSS code as a string to the W3.org
    css validator using a mechanicalsoup browser, and it will return
    the validator results as a ResultSet (a list of query results in
    the form of bs4 Tags).

    Args:
        css_code: CSS code in the form of a string.

    Returns:
        results: A ResultSet of Tag objects.
    """
    try:
        response = browser.open("https://jigsaw.w3.org/css-validator")
        if not response.ok:
            response = browser.open("https://css-validator.org/")
        if response.ok:
            # Fill-in the search form based on css_code
            browser.select_form("#validate-by-input form")
            browser["text"] = css_code
            browser.submit_selected()
            results = browser.get_current_page().select("#results_container")
    except Exception:
        # Convert the file "no_css_connection.html" into a soup tag object
        no_connection_code = clerk.file_to_string(
            "webanalyst/no_css_connection.html"
        )
        soup = BeautifulSoup(no_connection_code, "lxml")
        # Convert string to result set
        results = soup.select("#results_container")
    return results