# # - Text::Unidecode - # This spec file was automatically generated by cpan2rpm [ver: 2.028] # The following arguments were used: # --keep-pipes Text::Unidecode 2>&1 # For more information on cpan2rpm please visit: http://perl.arix.com/ # %define pkgname Text-Unidecode %define filelist %{pkgname}-%{version}-filelist %define NVR %{pkgname}-%{version}-%{release} %define maketest 1 name: perl-Text-Unidecode summary: Text-Unidecode - - US-ASCII transliterations of Unicode text version: 0.04 release: 1 vendor: Sean M Burke packager: Arix International license: Artistic group: Applications/CPAN url: http://www.cpan.org buildroot: %{_tmppath}/%{name}-%{version}-%(id -u -n) buildarch: noarch prefix: %(echo %{_prefix}) source: http://search.cpan.org//CPAN/authors/id/S/SB/SBURKE/Text-Unidecode-0.04.tar.gz %description It often happens that you have non-Roman text data in Unicode, but you can't display it -- usually because you're trying to show it to a user via an application that doesn't support Unicode, or because the fonts you need aren't accessible. You could represent the Unicode characters as "???????" or "\15BA\15A0\1610...", but that's nearly useless to the user who actually wants to read what the text says. What Text::Unidecode provides is a function, `unidecode(...)' that takes Unicode data and tries to represent it in US-ASCII characters (i.e., the universally displayable characters between 0x00 and 0x7F). The representation is almost always an attempt at *transliteration* -- i.e., conveying, in Roman letters, the pronunciation expressed by the text in some other writing system. (See the example in the synopsis.) Unidecode's ability to transliterate is limited by two factors: =over =item * The amount and quality of data in the original So if you have Hebrew data that has no vowel points in it, then Unidecode cannot guess what vowels should appear in a pronounciation. S f y hv n vwls n th npt, y wn't gt ny vwls n th tpt. (This is a specific application of the general principle of "Garbage In, Garbage Out".) =item * Basic limitations in the Unidecode design Writing a real and clever transliteration algorithm for any single language usually requires a lot of time, and at least a passable knowledge of the language involved. But Unicode text can convey more languages than I could possibly learn (much less create a transliterator for) in the entire rest of my lifetime. So I put a cap on how intelligent Unidecode could be, by insisting that it support only context-*in*sensitive transliteration. That means missing the finer details of any given writing system, while still hopefully being useful. =back Unidecode, in other words, is quick and dirty. Sometimes the output is not so dirty at all: Russian and Greek seem to work passably; and while Thaana (Divehi, AKA Maldivian) is a definitely non-Western writing system, setting up a mapping from it to Roman letters seems to work pretty well. But sometimes the output is *very dirty:* Unidecode does quite badly on Japanese and Thai. If you want a smarter transliteration for a particular language than Unidecode provides, then you should look for (or write) a transliteration algorithm specific to that language, and apply it instead of (or at least before) applying Unidecode. In other words, Unidecode's approach is broad (knowing about dozens of writing systems), but shallow (not being meticulous about any of them). # # This package was generated automatically with the cpan2rpm # utility. To get this software or for more information # please visit: http://perl.arix.com/ # %prep %setup -q -n %{pkgname}-%{version} chmod -R u+w %{_builddir}/%{pkgname}-%{version} %build grep -rsl '^#!.*perl' . | grep -v '.bak$' |xargs --no-run-if-empty \ %__perl -MExtUtils::MakeMaker -e 'MY->fixin(@ARGV)' CFLAGS="$RPM_OPT_FLAGS" %{__perl} Makefile.PL `%{__perl} -MExtUtils::MakeMaker -e ' print qq|PREFIX=%{buildroot}%{_prefix}| if \$ExtUtils::MakeMaker::VERSION =~ /5\.9[1-6]|6\.0[0-5]/ '` %{__make} %if %maketest %{__make} test %endif %install [ "%{buildroot}" != "/" ] && rm -rf %{buildroot} %{makeinstall} `%{__perl} -MExtUtils::MakeMaker -e ' print \$ExtUtils::MakeMaker::VERSION <= 6.05 ? qq|PREFIX=%{buildroot}%{_prefix}| : qq|DESTDIR=%{buildroot}| '` cmd=/usr/share/spec-helper/compress_files [ -x $cmd ] || cmd=/usr/lib/rpm/brp-compress [ -x $cmd ] && $cmd # SuSE Linux if [ -e /etc/SuSE-release -o -e /etc/UnitedLinux-release ] then %{__mkdir_p} %{buildroot}/var/adm/perl-modules %{__cat} `find %{buildroot} -name "perllocal.pod"` \ | %{__sed} -e s+%{buildroot}++g \ > %{buildroot}/var/adm/perl-modules/%{name} fi # remove special files find %{buildroot} -name "perllocal.pod" \ -o -name ".packlist" \ -o -name "*.bs" \ |xargs -i rm -f {} # no empty directories find %{buildroot}%{_prefix} \ -type d -depth \ -exec rmdir {} \; 2>/dev/null %{__perl} -MFile::Find -le ' find({ wanted => \&wanted, no_chdir => 1}, "%{buildroot}"); print "%doc TODO.txt README"; for my $x (sort @dirs, @files) { push @ret, $x unless indirs($x); } print join "\n", sort @ret; sub wanted { return if /auto$/; local $_ = $File::Find::name; my $f = $_; s|^\Q%{buildroot}\E||; return unless length; return $files[@files] = $_ if -f $f; $d = $_; /\Q$d\E/ && return for reverse sort @INC; $d =~ /\Q$_\E/ && return for qw|/etc %_prefix/man %_prefix/bin %_prefix/share|; $dirs[@dirs] = $_; } sub indirs { my $x = shift; $x =~ /^\Q$_\E\// && $x ne $_ && return 1 for @dirs; } ' > %filelist [ -z %filelist ] && { echo "ERROR: empty %files listing" exit -1 } %clean [ "%{buildroot}" != "/" ] && rm -rf %{buildroot} %files -f %filelist %defattr(-,root,root) %changelog * Mon Nov 16 2009 rpm@janus.arix.com - Initial build.