Skip to content Skip to sidebar Skip to footer

How To Remove String Unicode From List

I am trying to remove the string unicode 'u'' marks in my string list. The list is a list of actors from this site http://www.boxofficemojo.com/yearly/chart/?yr=2013&p=.htm. I

Solution 1:

Provide a small example that reproduces the problem and it is much easier to correct your mistakes. Lacking that, here's an example, with the UnicodeWriter straight from the codecs documentation. Just make sure your data is a list of lists of Unicode strings:

#!python2
#coding:utf8
import csv
import cStringIO
import codecs

data = [[u'Chinese',u'English'],
        [u'马克',u'Mark'],
        [u'你好',u'Hello']]

class UnicodeWriter:
    """
    A CSV writer which will write rows to CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds):
        # Redirect output to a queue
        self.queue = cStringIO.StringIO()
        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
        self.stream = f
        self.encoder = codecs.getincrementalencoder(encoding)()

    def writerow(self, row):
        self.writer.writerow([s.encode("utf-8") for s in row])
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode("utf-8")
        # ... and reencode it into the target encoding
        data = self.encoder.encode(data)
        # write to the target stream
        self.stream.write(data)
        # empty queue
        self.queue.truncate(0)

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)

with open('out.csv','wb') as f:
    w = UnicodeWriter(f)
    w.writerows(data)

Post a Comment for "How To Remove String Unicode From List"