TagId3.cxx 14 KB
Newer Older
1
/*
Max Kellermann's avatar
Max Kellermann committed
2
 * Copyright (C) 2003-2014 The Music Player Daemon Project
3
 * http://www.musicpd.org
4 5 6 7 8 9 10 11 12 13
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
14 15 16 17
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 19
 */

20
#include "config.h"
Max Kellermann's avatar
Max Kellermann committed
21
#include "TagId3.hxx"
22
#include "TagHandler.hxx"
23
#include "TagTable.hxx"
24
#include "TagBuilder.hxx"
25
#include "util/Alloc.hxx"
26
#include "util/StringUtil.hxx"
27
#include "util/Error.hxx"
28 29
#include "util/Domain.hxx"
#include "Log.hxx"
30
#include "config/ConfigGlobal.hxx"
31 32
#include "Riff.hxx"
#include "Aiff.hxx"
33 34
#include "fs/Path.hxx"
#include "fs/FileSystem.hxx"
Max Kellermann's avatar
Max Kellermann committed
35

36
#ifdef HAVE_GLIB
37
#include <glib.h>
38 39
#endif

40 41
#include <id3tag.h>

42 43
#include <string>

44
#include <stdio.h>
45
#include <stdlib.h>
46 47
#include <string.h>

48 49 50 51 52 53 54
#  ifndef ID3_FRAME_COMPOSER
#    define ID3_FRAME_COMPOSER "TCOM"
#  endif
#  ifndef ID3_FRAME_DISC
#    define ID3_FRAME_DISC "TPOS"
#  endif

Bart Nagel's avatar
Bart Nagel committed
55 56 57 58
#ifndef ID3_FRAME_ARTIST_SORT
#define ID3_FRAME_ARTIST_SORT "TSOP"
#endif

59
#ifndef ID3_FRAME_ALBUM_ARTIST_SORT
Bart Nagel's avatar
Bart Nagel committed
60
#define ID3_FRAME_ALBUM_ARTIST_SORT "TSO2" /* this one is unofficial, introduced by Itunes */
61 62 63 64 65 66
#endif

#ifndef ID3_FRAME_ALBUM_ARTIST
#define ID3_FRAME_ALBUM_ARTIST "TPE2"
#endif

67 68
static constexpr Domain id3_domain("id3");

69 70 71 72 73 74
static inline bool
tag_is_id3v1(struct id3_tag *tag)
{
	return (id3_tag_options(tag, 0, 0) & ID3_TAG_OPTION_ID3V1) != 0;
}

75 76 77
static id3_utf8_t *
tag_id3_getstring(const struct id3_frame *frame, unsigned i)
{
78
	id3_field *field = id3_frame_field(frame, i);
Max Kellermann's avatar
Max Kellermann committed
79 80
	if (field == nullptr)
		return nullptr;
81

82
	const id3_ucs4_t *ucs4 = id3_field_getstring(field);
Max Kellermann's avatar
Max Kellermann committed
83 84
	if (ucs4 == nullptr)
		return nullptr;
85 86 87 88

	return id3_ucs4_utf8duplicate(ucs4);
}

89 90
/* This will try to convert a string to utf-8,
 */
Max Kellermann's avatar
Max Kellermann committed
91
static id3_utf8_t *
92
import_id3_string(bool is_id3v1, const id3_ucs4_t *ucs4)
93
{
94
	id3_utf8_t *utf8;
95

96
#ifdef HAVE_GLIB
97
	/* use encoding field here? */
98
	const char *encoding;
99
	if (is_id3v1 &&
Max Kellermann's avatar
Max Kellermann committed
100
	    (encoding = config_get_string(CONF_ID3V1_ENCODING, nullptr)) != nullptr) {
101
		id3_latin1_t *isostr = id3_ucs4_latin1duplicate(ucs4);
102
		if (gcc_unlikely(isostr == nullptr))
Max Kellermann's avatar
Max Kellermann committed
103
			return nullptr;
104 105 106

		utf8 = (id3_utf8_t *)
			g_convert_with_fallback((const char*)isostr, -1,
107
						"utf-8", encoding,
Max Kellermann's avatar
Max Kellermann committed
108 109 110
						nullptr, nullptr,
						nullptr, nullptr);
		if (utf8 == nullptr) {
111 112 113
			FormatWarning(id3_domain,
				      "Unable to convert %s string to UTF-8: '%s'",
				      encoding, isostr);
114
			free(isostr);
Max Kellermann's avatar
Max Kellermann committed
115
			return nullptr;
116
		}
117
		free(isostr);
118
	} else {
119 120 121
#else
		(void)is_id3v1;
#endif
122
		utf8 = id3_ucs4_utf8duplicate(ucs4);
123
		if (gcc_unlikely(utf8 == nullptr))
Max Kellermann's avatar
Max Kellermann committed
124
			return nullptr;
125
#ifdef HAVE_GLIB
126
	}
127
#endif
128

129
	id3_utf8_t *utf8_stripped = (id3_utf8_t *)
130
		xstrdup(Strip((char *)utf8));
131
	free(utf8);
132 133

	return utf8_stripped;
134 135
}

136 137 138 139 140 141 142
/**
 * Import a "Text information frame" (ID3v2.4.0 section 4.2).  It
 * contains 2 fields:
 *
 * - encoding
 * - string list
 */
143
static void
144
tag_id3_import_text_frame(struct id3_tag *tag, const struct id3_frame *frame,
145
			  TagType type,
146
			  const struct tag_handler *handler, void *handler_ctx)
147
{
148
	if (frame->nfields != 2)
149
		return;
150 151 152

	/* check the encoding field */

153
	const id3_field *field = id3_frame_field(frame, 0);
Max Kellermann's avatar
Max Kellermann committed
154
	if (field == nullptr || field->type != ID3_FIELD_TYPE_TEXTENCODING)
155
		return;
156

157 158 159
	/* process the value(s) */

	field = id3_frame_field(frame, 1);
Max Kellermann's avatar
Max Kellermann committed
160
	if (field == nullptr || field->type != ID3_FIELD_TYPE_STRINGLIST)
161 162 163
		return;

	/* Get the number of strings available */
164 165 166
	const unsigned nstrings = id3_field_getnstrings(field);
	for (unsigned i = 0; i < nstrings; i++) {
		const id3_ucs4_t *ucs4 = id3_field_getstrings(field, i);
Max Kellermann's avatar
Max Kellermann committed
167
		if (ucs4 == nullptr)
168 169
			continue;

170
		if (type == TAG_GENRE)
171 172
			ucs4 = id3_genre_name(ucs4);

173
		id3_utf8_t *utf8 = import_id3_string(tag_is_id3v1(tag), ucs4);
Max Kellermann's avatar
Max Kellermann committed
174
		if (utf8 == nullptr)
175 176
			continue;

177 178
		tag_handler_invoke_tag(handler, handler_ctx,
				       type, (const char *)utf8);
179
		free(utf8);
180
	}
181 182
}

183 184 185 186 187
/**
 * Import all text frames with the specified id (ID3v2.4.0 section
 * 4.2).  This is a wrapper for tag_id3_import_text_frame().
 */
static void
188
tag_id3_import_text(struct id3_tag *tag, const char *id, TagType type,
189
		    const struct tag_handler *handler, void *handler_ctx)
190 191 192
{
	const struct id3_frame *frame;
	for (unsigned i = 0;
Max Kellermann's avatar
Max Kellermann committed
193
	     (frame = id3_tag_findframe(tag, id, i)) != nullptr; ++i)
194 195
		tag_id3_import_text_frame(tag, frame, type,
					  handler, handler_ctx);
196 197
}

198 199 200 201 202 203 204 205 206 207
/**
 * Import a "Comment frame" (ID3v2.4.0 section 4.10).  It
 * contains 4 fields:
 *
 * - encoding
 * - language
 * - string
 * - full string (we use this one)
 */
static void
208
tag_id3_import_comment_frame(struct id3_tag *tag,
209
			     const struct id3_frame *frame, TagType type,
210 211
			     const struct tag_handler *handler,
			     void *handler_ctx)
212
{
213
	if (frame->nfields != 4)
214 215 216
		return;

	/* for now I only read the 4th field, with the fullstring */
217
	const id3_field *field = id3_frame_field(frame, 3);
Max Kellermann's avatar
Max Kellermann committed
218
	if (field == nullptr)
219 220
		return;

221
	const id3_ucs4_t *ucs4 = id3_field_getfullstring(field);
Max Kellermann's avatar
Max Kellermann committed
222
	if (ucs4 == nullptr)
223 224
		return;

225
	id3_utf8_t *utf8 = import_id3_string(tag_is_id3v1(tag), ucs4);
Max Kellermann's avatar
Max Kellermann committed
226
	if (utf8 == nullptr)
227 228
		return;

229
	tag_handler_invoke_tag(handler, handler_ctx, type, (const char *)utf8);
230
	free(utf8);
231 232
}

233 234 235 236 237
/**
 * Import all comment frames (ID3v2.4.0 section 4.10).  This is a
 * wrapper for tag_id3_import_comment_frame().
 */
static void
238
tag_id3_import_comment(struct id3_tag *tag, const char *id, TagType type,
239
		       const struct tag_handler *handler, void *handler_ctx)
240 241 242
{
	const struct id3_frame *frame;
	for (unsigned i = 0;
Max Kellermann's avatar
Max Kellermann committed
243
	     (frame = id3_tag_findframe(tag, id, i)) != nullptr; ++i)
244 245
		tag_id3_import_comment_frame(tag, frame, type,
					     handler, handler_ctx);
246 247
}

248
/**
249
 * Parse a TXXX name, and convert it to a TagType enum value.
250 251
 * Returns TAG_NUM_OF_ITEM_TYPES if the TXXX name is not understood.
 */
252
static TagType
253 254
tag_id3_parse_txxx_name(const char *name)
{
255 256 257 258 259 260 261
	static const struct tag_table txxx_tags[] = {
		{ "ALBUMARTISTSORT", TAG_ALBUM_ARTIST_SORT },
		{ "MusicBrainz Artist Id", TAG_MUSICBRAINZ_ARTISTID },
		{ "MusicBrainz Album Id", TAG_MUSICBRAINZ_ALBUMID },
		{ "MusicBrainz Album Artist Id",
		  TAG_MUSICBRAINZ_ALBUMARTISTID },
		{ "MusicBrainz Track Id", TAG_MUSICBRAINZ_TRACKID },
262 263
		{ "MusicBrainz Release Track Id",
		  TAG_MUSICBRAINZ_RELEASETRACKID },
Max Kellermann's avatar
Max Kellermann committed
264
		{ nullptr, TAG_NUM_OF_ITEM_TYPES }
265 266
	};

267
	return tag_table_lookup(txxx_tags, name);
268 269 270 271 272 273
}

/**
 * Import all known MusicBrainz tags from TXXX frames.
 */
static void
274 275 276
tag_id3_import_musicbrainz(struct id3_tag *id3_tag,
			   const struct tag_handler *handler,
			   void *handler_ctx)
277 278
{
	for (unsigned i = 0;; ++i) {
279
		const id3_frame *frame = id3_tag_findframe(id3_tag, "TXXX", i);
Max Kellermann's avatar
Max Kellermann committed
280
		if (frame == nullptr)
281 282
			break;

283
		id3_utf8_t *name = tag_id3_getstring(frame, 1);
Max Kellermann's avatar
Max Kellermann committed
284
		if (name == nullptr)
285 286
			continue;

287
		id3_utf8_t *value = tag_id3_getstring(frame, 2);
Max Kellermann's avatar
Max Kellermann committed
288
		if (value == nullptr)
289 290
			continue;

291 292 293 294
		tag_handler_invoke_pair(handler, handler_ctx,
					(const char *)name,
					(const char *)value);

295
		TagType type = tag_id3_parse_txxx_name((const char*)name);
296 297 298 299 300 301
		free(name);

		if (type != TAG_NUM_OF_ITEM_TYPES)
			tag_handler_invoke_tag(handler, handler_ctx,
					       type, (const char*)value);

302 303 304 305
		free(value);
	}
}

306 307 308 309
/**
 * Imports the MusicBrainz TrackId from the UFID tag.
 */
static void
310 311
tag_id3_import_ufid(struct id3_tag *id3_tag,
		    const struct tag_handler *handler, void *handler_ctx)
312 313
{
	for (unsigned i = 0;; ++i) {
314
		const id3_frame *frame = id3_tag_findframe(id3_tag, "UFID", i);
Max Kellermann's avatar
Max Kellermann committed
315
		if (frame == nullptr)
316 317
			break;

318
		id3_field *field = id3_frame_field(frame, 0);
Max Kellermann's avatar
Max Kellermann committed
319
		if (field == nullptr)
320 321
			continue;

322
		const id3_latin1_t *name = id3_field_getlatin1(field);
Max Kellermann's avatar
Max Kellermann committed
323
		if (name == nullptr ||
324 325 326 327
		    strcmp((const char *)name, "http://musicbrainz.org") != 0)
			continue;

		field = id3_frame_field(frame, 1);
Max Kellermann's avatar
Max Kellermann committed
328
		if (field == nullptr)
329 330
			continue;

331 332 333
		id3_length_t length;
		const id3_byte_t *value =
			id3_field_getbinarydata(field, &length);
Max Kellermann's avatar
Max Kellermann committed
334
		if (value == nullptr || length == 0)
335 336
			continue;

337
		std::string p((const char *)value, length);
338
		tag_handler_invoke_tag(handler, handler_ctx,
339
				       TAG_MUSICBRAINZ_TRACKID, p.c_str());
340 341 342
	}
}

343
void
344 345 346 347 348 349 350 351 352
scan_id3_tag(struct id3_tag *tag,
	     const struct tag_handler *handler, void *handler_ctx)
{
	tag_id3_import_text(tag, ID3_FRAME_ARTIST, TAG_ARTIST,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_ALBUM_ARTIST,
			    TAG_ALBUM_ARTIST, handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_ARTIST_SORT,
			    TAG_ARTIST_SORT, handler, handler_ctx);
353 354 355

	tag_id3_import_text(tag, "TSOA", TAG_ALBUM_SORT, handler, handler_ctx);

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
	tag_id3_import_text(tag, ID3_FRAME_ALBUM_ARTIST_SORT,
			    TAG_ALBUM_ARTIST_SORT, handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_TITLE, TAG_TITLE,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_ALBUM, TAG_ALBUM,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_TRACK, TAG_TRACK,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_YEAR, TAG_DATE,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_GENRE, TAG_GENRE,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_COMPOSER, TAG_COMPOSER,
			    handler, handler_ctx);
	tag_id3_import_text(tag, "TPE3", TAG_PERFORMER,
			    handler, handler_ctx);
	tag_id3_import_text(tag, "TPE4", TAG_PERFORMER, handler, handler_ctx);
	tag_id3_import_comment(tag, ID3_FRAME_COMMENT, TAG_COMMENT,
			       handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_DISC, TAG_DISC,
			    handler, handler_ctx);

	tag_id3_import_musicbrainz(tag, handler, handler_ctx);
	tag_id3_import_ufid(tag, handler, handler_ctx);
}

Max Kellermann's avatar
Max Kellermann committed
382 383
Tag *
tag_id3_import(struct id3_tag *tag)
384
{
385 386 387 388
	TagBuilder tag_builder;
	scan_id3_tag(tag, &add_tag_handler, &tag_builder);
	return tag_builder.IsEmpty()
		? nullptr
389
		: tag_builder.CommitNew();
390 391
}

392
static size_t
Max Kellermann's avatar
Max Kellermann committed
393
fill_buffer(void *buf, size_t size, FILE *stream, long offset, int whence)
394 395 396 397 398
{
	if (fseek(stream, offset, whence) != 0) return 0;
	return fread(buf, 1, size, stream);
}

399
static long
Max Kellermann's avatar
Max Kellermann committed
400
get_id3v2_footer_size(FILE *stream, long offset, int whence)
401 402
{
	id3_byte_t buf[ID3_TAG_QUERYSIZE];
403 404
	size_t bufsize = fill_buffer(buf, ID3_TAG_QUERYSIZE, stream, offset, whence);
	if (bufsize == 0) return 0;
405 406 407
	return id3_tag_query(buf, bufsize);
}

Max Kellermann's avatar
Max Kellermann committed
408 409
static struct id3_tag *
tag_id3_read(FILE *stream, long offset, int whence)
410 411
{
	/* It's ok if we get less than we asked for */
412 413 414
	id3_byte_t query_buffer[ID3_TAG_QUERYSIZE];
	size_t query_buffer_size = fill_buffer(query_buffer, ID3_TAG_QUERYSIZE,
					       stream, offset, whence);
Max Kellermann's avatar
Max Kellermann committed
415 416
	if (query_buffer_size <= 0)
		return nullptr;
417 418

	/* Look for a tag header */
419
	long tag_size = id3_tag_query(query_buffer, query_buffer_size);
Max Kellermann's avatar
Max Kellermann committed
420
	if (tag_size <= 0) return nullptr;
421 422

	/* Found a tag.  Allocate a buffer and read it in. */
423
	id3_byte_t *tag_buffer = new id3_byte_t[tag_size];
Max Kellermann's avatar
Max Kellermann committed
424 425
	int tag_buffer_size = fill_buffer(tag_buffer, tag_size,
					  stream, offset, whence);
Max Kellermann's avatar
Max Kellermann committed
426
	if (tag_buffer_size < tag_size) {
427
		delete[] tag_buffer;
Max Kellermann's avatar
Max Kellermann committed
428
		return nullptr;
429 430
	}

431
	id3_tag *tag = id3_tag_parse(tag_buffer, tag_buffer_size);
432
	delete[] tag_buffer;
433 434 435
	return tag;
}

Max Kellermann's avatar
Max Kellermann committed
436 437
static struct id3_tag *
tag_id3_find_from_beginning(FILE *stream)
438
{
439
	id3_tag *tag = tag_id3_read(stream, 0, SEEK_SET);
440
	if (!tag) {
Max Kellermann's avatar
Max Kellermann committed
441
		return nullptr;
Max Kellermann's avatar
Max Kellermann committed
442
	} else if (tag_is_id3v1(tag)) {
443 444
		/* id3v1 tags don't belong here */
		id3_tag_delete(tag);
Max Kellermann's avatar
Max Kellermann committed
445
		return nullptr;
446 447 448
	}

	/* We have an id3v2 tag, so let's look for SEEK frames */
449
	id3_frame *frame;
450 451
	while ((frame = id3_tag_findframe(tag, "SEEK", 0))) {
		/* Found a SEEK frame, get it's value */
452
		int seek = id3_field_getint(id3_frame_field(frame, 0));
453 454 455 456
		if (seek < 0)
			break;

		/* Get the tag specified by the SEEK frame */
457
		id3_tag *seektag = tag_id3_read(stream, seek, SEEK_CUR);
Max Kellermann's avatar
Max Kellermann committed
458
		if (!seektag || tag_is_id3v1(seektag))
459 460 461 462 463 464 465 466 467 468
			break;

		/* Replace the old tag with the new one */
		id3_tag_delete(tag);
		tag = seektag;
	}

	return tag;
}

Max Kellermann's avatar
Max Kellermann committed
469 470
static struct id3_tag *
tag_id3_find_from_end(FILE *stream)
471 472
{
	/* Get an id3v1 tag from the end of file for later use */
473
	id3_tag *v1tag = tag_id3_read(stream, -128, SEEK_END);
474 475

	/* Get the id3v2 tag size from the footer (located before v1tag) */
476
	int tagsize = get_id3v2_footer_size(stream, (v1tag ? -128 : 0) - 10, SEEK_END);
477 478 479 480
	if (tagsize >= 0)
		return v1tag;

	/* Get the tag which the footer belongs to */
481
	id3_tag *tag = tag_id3_read(stream, tagsize, SEEK_CUR);
482 483 484 485 486 487 488 489 490
	if (!tag)
		return v1tag;

	/* We have an id3v2 tag, so ditch v1tag */
	id3_tag_delete(v1tag);

	return tag;
}

491
static struct id3_tag *
492
tag_id3_riff_aiff_load(FILE *file)
493
{
Max Kellermann's avatar
Max Kellermann committed
494
	size_t size = riff_seek_id3(file);
495 496
	if (size == 0)
		size = aiff_seek_id3(file);
497
	if (size == 0)
Max Kellermann's avatar
Max Kellermann committed
498
		return nullptr;
499

500
	if (size > 4 * 1024 * 1024)
501
		/* too large, don't allocate so much memory */
Max Kellermann's avatar
Max Kellermann committed
502
		return nullptr;
503

504
	id3_byte_t *buffer = new id3_byte_t[size];
Max Kellermann's avatar
Max Kellermann committed
505
	size_t ret = fread(buffer, size, 1, file);
506
	if (ret != 1) {
507
		LogWarning(id3_domain, "Failed to read RIFF chunk");
508
		delete[] buffer;
Max Kellermann's avatar
Max Kellermann committed
509
		return nullptr;
510 511
	}

Max Kellermann's avatar
Max Kellermann committed
512
	struct id3_tag *tag = id3_tag_parse(buffer, size);
513
	delete[] buffer;
514 515 516
	return tag;
}

517
struct id3_tag *
518
tag_id3_load(Path path_fs, Error &error)
519
{
520
	FILE *file = FOpen(path_fs, "rb");
Max Kellermann's avatar
Max Kellermann committed
521
	if (file == nullptr) {
522
		error.FormatErrno("Failed to open file %s", path_fs.c_str());
Max Kellermann's avatar
Max Kellermann committed
523
		return nullptr;
524
	}
525

526
	struct id3_tag *tag = tag_id3_find_from_beginning(file);
Max Kellermann's avatar
Max Kellermann committed
527
	if (tag == nullptr) {
528
		tag = tag_id3_riff_aiff_load(file);
Max Kellermann's avatar
Max Kellermann committed
529
		if (tag == nullptr)
530
			tag = tag_id3_find_from_end(file);
531 532
	}

533 534 535
	fclose(file);
	return tag;
}
536

537
bool
538
tag_id3_scan(Path path_fs,
539 540
	     const struct tag_handler *handler, void *handler_ctx)
{
541 542
	Error error;
	struct id3_tag *tag = tag_id3_load(path_fs, error);
Max Kellermann's avatar
Max Kellermann committed
543
	if (tag == nullptr) {
544
		if (error.IsDefined())
545
			LogError(error);
546 547

		return false;
548
	}
549 550 551 552 553

	scan_id3_tag(tag, handler, handler_ctx);
	id3_tag_delete(tag);
	return true;
}