Converter.cxx 4.08 KB
Newer Older
1
/*
Max Kellermann's avatar
Max Kellermann committed
2
 * Copyright 2003-2021 The Music Player Daemon Project
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 * http://www.musicpd.org
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include "Converter.hxx"
21
#include "util/AllocatedString.hxx"
22
#include "config.h"
23

24 25
#include <fmt/format.h>

26
#include <iterator>
27
#include <stdexcept>
28 29 30

#include <string.h>

31 32
#ifdef HAVE_ICU
#include "Util.hxx"
33
#include "util/AllocatedArray.hxx"
34
#include <unicode/ucnv.h>
35
#elif defined(HAVE_ICONV)
36
#include "system/Error.hxx"
37 38
#endif

39 40 41 42 43 44 45 46 47
#ifdef HAVE_ICU

IcuConverter::~IcuConverter()
{
	ucnv_close(converter);
}

#endif

48 49
#ifdef HAVE_ICU_CONVERTER

50
std::unique_ptr<IcuConverter>
51
IcuConverter::Create(const char *charset)
52
{
53 54 55
#ifdef HAVE_ICU
	UErrorCode code = U_ZERO_ERROR;
	UConverter *converter = ucnv_open(charset, &code);
56
	if (converter == nullptr)
57 58
		throw std::runtime_error(fmt::format(FMT_STRING("Failed to initialize charset '{}': {}"),
						     charset, u_errorName(code)));
59

60
	return std::unique_ptr<IcuConverter>(new IcuConverter(converter));
61 62 63 64
#elif defined(HAVE_ICONV)
	iconv_t to = iconv_open("utf-8", charset);
	iconv_t from = iconv_open(charset, "utf-8");
	if (to == (iconv_t)-1 || from == (iconv_t)-1) {
65
		int e = errno;
66 67 68 69
		if (to != (iconv_t)-1)
			iconv_close(to);
		if (from != (iconv_t)-1)
			iconv_close(from);
70 71
		throw MakeErrno(e, fmt::format(FMT_STRING("Failed to initialize charset '{}'"),
					       charset).c_str());
72 73
	}

74
	return std::unique_ptr<IcuConverter>(new IcuConverter(to, from));
75
#endif
76 77
}

78
#ifdef HAVE_ICU
79 80
#elif defined(HAVE_ICONV)

81
static AllocatedString
82
DoConvert(iconv_t conv, std::string_view src)
83 84 85
{
	// TODO: dynamic buffer?
	char buffer[4096];
86
	char *in = const_cast<char *>(src.data());
87
	char *out = buffer;
88
	size_t in_left = src.size();
89 90 91 92
	size_t out_left = sizeof(buffer);

	size_t n = iconv(conv, &in, &in_left, &out, &out_left);

93 94 95 96 97
	if (n == static_cast<size_t>(-1))
		throw MakeErrno("Charset conversion failed");

	if (in_left > 0)
		throw std::runtime_error("Charset conversion failed");
98

99
	return AllocatedString({buffer, sizeof(buffer) - out_left});
100 101
}

102 103
#endif

104
AllocatedString
105
IcuConverter::ToUTF8(std::string_view s) const
106
{
107
#ifdef HAVE_ICU
108
	const std::lock_guard<Mutex> protect(mutex);
109 110 111 112 113

	ucnv_resetToUnicode(converter);

	// TODO: dynamic buffer?
	UChar buffer[4096], *target = buffer;
114
	const char *source = s.data();
115 116 117

	UErrorCode code = U_ZERO_ERROR;

118
	ucnv_toUnicode(converter, &target, buffer + std::size(buffer),
119
		       &source, source + s.size(),
120 121
		       nullptr, true, &code);
	if (code != U_ZERO_ERROR)
122 123
		throw std::runtime_error(fmt::format(FMT_STRING("Failed to convert to Unicode: {}"),
						     u_errorName(code)));
124 125

	const size_t target_length = target - buffer;
126
	return UCharToUTF8({buffer, target_length});
127
#elif defined(HAVE_ICONV)
128
	return DoConvert(to_utf8, s);
129
#endif
130 131
}

132
AllocatedString
133
IcuConverter::FromUTF8(std::string_view s) const
134
{
135
#ifdef HAVE_ICU
136
	const std::lock_guard<Mutex> protect(mutex);
137 138 139 140 141 142 143

	const auto u = UCharFromUTF8(s);

	ucnv_resetFromUnicode(converter);

	// TODO: dynamic buffer?
	char buffer[4096], *target = buffer;
144
	const UChar *source = u.begin();
145 146
	UErrorCode code = U_ZERO_ERROR;

147
	ucnv_fromUnicode(converter, &target, buffer + std::size(buffer),
148 149 150 151
			 &source, u.end(),
			 nullptr, true, &code);

	if (code != U_ZERO_ERROR)
152 153
		throw std::runtime_error(fmt::format(FMT_STRING("Failed to convert from Unicode: {}"),
						     u_errorName(code)));
154

155
	return AllocatedString({buffer, size_t(target - buffer)});
156

157
#elif defined(HAVE_ICONV)
158
	return DoConvert(from_utf8, s);
159
#endif
160 161 162
}

#endif