Source code for ftp.decoders.parser_csv

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Interpreter version: python 2.7
#
"""
This submodule is used to parse metadata from CSV_ (``.csv``) files.

.. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values

Example of the valid data::

    ISBN knihy;978-80-87270-99-8
    Vazba knihy;brož.
    Nazev knihy;whatever.csv
    Misto vydani;Praha
    Nakladatel;Garda
    Datum vydani;IX.12
    Poradi vydani;1
    Zpracovatel zaznamu;Franta Putsalek

See :doc:`/workflow/required` for list of required fields.
"""
#= Imports ====================================================================
import csv

import validator
from meta_exceptions import MetaParsingException


#= Functions & objects ========================================================
def _remove_quotes(word):
    """
    Remove quotes from `word` if the word starts and ands with quotes (" or ').
    """
    if not word or len(word) <= 2:
        return word

    if word[0] == word[-1] and word[0] in ["'", '"']:
        return word[1:-1]

    return word


[docs]def decode(data): """ Handles decoding of the CSV `data`. Args: data (str): Data which will be decoded. Returns: dict: Dictionary with decoded data. """ # try to guess dialect of the csv file dialect = None try: dialect = csv.Sniffer().sniff(data) except Exception: pass # parse data with csv parser handler = None try: data = data.splitlines() # used later handler = csv.reader(data, dialect) except Exception, e: raise MetaParsingException("Can't parse your CSV data: %s" % e.message) # make sure, that data are meaningful decoded = [] for cnt, line in enumerate(handler): usable_data = filter(lambda x: x.strip(), line) if not usable_data: continue if len(usable_data) != 2: raise MetaParsingException( "Bad number of elements - line %d:\n\t%s\n" % (cnt, data[cnt]) ) # remove trailing spaces, decode to utf-8 usable_data = map(lambda x: x.strip().decode("utf-8"), usable_data) # remove quotes if the csv.Sniffer failed to decode right `dialect` usable_data = map(lambda x: _remove_quotes(x), usable_data) decoded.append(usable_data) # apply another checks to data decoded = validator.check_structure(decoded) return decoded